1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
65 enum upper_128bits_state
72 typedef struct block_info_def
74 /* State of the upper 128bits of AVX registers at exit. */
75 enum upper_128bits_state state
;
76 /* TRUE if state of the upper 128bits of AVX registers is unchanged
79 /* TRUE if block has been processed. */
81 /* TRUE if block has been scanned. */
83 /* Previous state of the upper 128bits of AVX registers at entry. */
84 enum upper_128bits_state prev
;
87 #define BLOCK_INFO(B) ((block_info) (B)->aux)
89 enum call_avx256_state
91 /* Callee returns 256bit AVX register. */
92 callee_return_avx256
= -1,
93 /* Callee returns and passes 256bit AVX register. */
94 callee_return_pass_avx256
,
95 /* Callee passes 256bit AVX register. */
97 /* Callee doesn't return nor passe 256bit AVX register, or no
98 256bit AVX register in function return. */
100 /* vzeroupper intrinsic. */
104 /* Check if a 256bit AVX register is referenced in stores. */
107 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
110 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
111 || (GET_CODE (set
) == SET
112 && REG_P (SET_SRC (set
))
113 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
115 enum upper_128bits_state
*state
116 = (enum upper_128bits_state
*) data
;
121 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
122 in basic block BB. Delete it if upper 128bit AVX registers are
123 unused. If it isn't deleted, move it to just before a jump insn.
125 STATE is state of the upper 128bits of AVX registers at entry. */
128 move_or_delete_vzeroupper_2 (basic_block bb
,
129 enum upper_128bits_state state
)
132 rtx vzeroupper_insn
= NULL_RTX
;
137 if (BLOCK_INFO (bb
)->unchanged
)
140 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
143 BLOCK_INFO (bb
)->state
= state
;
147 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
150 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
151 bb
->index
, BLOCK_INFO (bb
)->state
);
155 BLOCK_INFO (bb
)->prev
= state
;
158 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
163 /* BB_END changes when it is deleted. */
164 bb_end
= BB_END (bb
);
166 while (insn
!= bb_end
)
168 insn
= NEXT_INSN (insn
);
170 if (!NONDEBUG_INSN_P (insn
))
173 /* Move vzeroupper before jump/call. */
174 if (JUMP_P (insn
) || CALL_P (insn
))
176 if (!vzeroupper_insn
)
179 if (PREV_INSN (insn
) != vzeroupper_insn
)
183 fprintf (dump_file
, "Move vzeroupper after:\n");
184 print_rtl_single (dump_file
, PREV_INSN (insn
));
185 fprintf (dump_file
, "before:\n");
186 print_rtl_single (dump_file
, insn
);
188 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
191 vzeroupper_insn
= NULL_RTX
;
195 pat
= PATTERN (insn
);
197 /* Check insn for vzeroupper intrinsic. */
198 if (GET_CODE (pat
) == UNSPEC_VOLATILE
199 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
203 /* Found vzeroupper intrinsic. */
204 fprintf (dump_file
, "Found vzeroupper:\n");
205 print_rtl_single (dump_file
, insn
);
210 /* Check insn for vzeroall intrinsic. */
211 if (GET_CODE (pat
) == PARALLEL
212 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
213 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
218 /* Delete pending vzeroupper insertion. */
221 delete_insn (vzeroupper_insn
);
222 vzeroupper_insn
= NULL_RTX
;
225 else if (state
!= used
)
227 note_stores (pat
, check_avx256_stores
, &state
);
234 /* Process vzeroupper intrinsic. */
235 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
239 /* Since the upper 128bits are cleared, callee must not pass
240 256bit AVX register. We only need to check if callee
241 returns 256bit AVX register. */
242 if (avx256
== callee_return_avx256
)
248 /* Remove unnecessary vzeroupper since upper 128bits are
252 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
253 print_rtl_single (dump_file
, insn
);
259 /* Set state to UNUSED if callee doesn't return 256bit AVX
261 if (avx256
!= callee_return_pass_avx256
)
264 if (avx256
== callee_return_pass_avx256
265 || avx256
== callee_pass_avx256
)
267 /* Must remove vzeroupper since callee passes in 256bit
271 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
272 print_rtl_single (dump_file
, insn
);
278 vzeroupper_insn
= insn
;
284 BLOCK_INFO (bb
)->state
= state
;
285 BLOCK_INFO (bb
)->unchanged
= unchanged
;
286 BLOCK_INFO (bb
)->scanned
= true;
289 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
290 bb
->index
, unchanged
? "unchanged" : "changed",
294 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
295 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
296 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
300 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
304 enum upper_128bits_state state
, old_state
, new_state
;
308 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
309 block
->index
, BLOCK_INFO (block
)->processed
);
311 if (BLOCK_INFO (block
)->processed
)
316 /* Check all predecessor edges of this block. */
317 seen_unknown
= false;
318 FOR_EACH_EDGE (e
, ei
, block
->preds
)
322 switch (BLOCK_INFO (e
->src
)->state
)
325 if (!unknown_is_unused
)
339 old_state
= BLOCK_INFO (block
)->state
;
340 move_or_delete_vzeroupper_2 (block
, state
);
341 new_state
= BLOCK_INFO (block
)->state
;
343 if (state
!= unknown
|| new_state
== used
)
344 BLOCK_INFO (block
)->processed
= true;
346 /* Need to rescan if the upper 128bits of AVX registers are changed
348 if (new_state
!= old_state
)
350 if (new_state
== used
)
351 cfun
->machine
->rescan_vzeroupper_p
= 1;
358 /* Go through the instruction stream looking for vzeroupper. Delete
359 it if upper 128bit AVX registers are unused. If it isn't deleted,
360 move it to just before a jump insn. */
363 move_or_delete_vzeroupper (void)
368 fibheap_t worklist
, pending
, fibheap_swap
;
369 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
374 /* Set up block info for each basic block. */
375 alloc_aux_for_blocks (sizeof (struct block_info_def
));
377 /* Process outgoing edges of entry point. */
379 fprintf (dump_file
, "Process outgoing edges of entry point\n");
381 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
383 move_or_delete_vzeroupper_2 (e
->dest
,
384 cfun
->machine
->caller_pass_avx256_p
386 BLOCK_INFO (e
->dest
)->processed
= true;
389 /* Compute reverse completion order of depth first search of the CFG
390 so that the data-flow runs faster. */
391 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
392 bb_order
= XNEWVEC (int, last_basic_block
);
393 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
394 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
395 bb_order
[rc_order
[i
]] = i
;
398 worklist
= fibheap_new ();
399 pending
= fibheap_new ();
400 visited
= sbitmap_alloc (last_basic_block
);
401 in_worklist
= sbitmap_alloc (last_basic_block
);
402 in_pending
= sbitmap_alloc (last_basic_block
);
403 sbitmap_zero (in_worklist
);
405 /* Don't check outgoing edges of entry point. */
406 sbitmap_ones (in_pending
);
408 if (BLOCK_INFO (bb
)->processed
)
409 RESET_BIT (in_pending
, bb
->index
);
412 move_or_delete_vzeroupper_1 (bb
, false);
413 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
417 fprintf (dump_file
, "Check remaining basic blocks\n");
419 while (!fibheap_empty (pending
))
421 fibheap_swap
= pending
;
423 worklist
= fibheap_swap
;
424 sbitmap_swap
= in_pending
;
425 in_pending
= in_worklist
;
426 in_worklist
= sbitmap_swap
;
428 sbitmap_zero (visited
);
430 cfun
->machine
->rescan_vzeroupper_p
= 0;
432 while (!fibheap_empty (worklist
))
434 bb
= (basic_block
) fibheap_extract_min (worklist
);
435 RESET_BIT (in_worklist
, bb
->index
);
436 gcc_assert (!TEST_BIT (visited
, bb
->index
));
437 if (!TEST_BIT (visited
, bb
->index
))
441 SET_BIT (visited
, bb
->index
);
443 if (move_or_delete_vzeroupper_1 (bb
, false))
444 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
446 if (e
->dest
== EXIT_BLOCK_PTR
447 || BLOCK_INFO (e
->dest
)->processed
)
450 if (TEST_BIT (visited
, e
->dest
->index
))
452 if (!TEST_BIT (in_pending
, e
->dest
->index
))
454 /* Send E->DEST to next round. */
455 SET_BIT (in_pending
, e
->dest
->index
);
456 fibheap_insert (pending
,
457 bb_order
[e
->dest
->index
],
461 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
463 /* Add E->DEST to current round. */
464 SET_BIT (in_worklist
, e
->dest
->index
);
465 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
472 if (!cfun
->machine
->rescan_vzeroupper_p
)
477 fibheap_delete (worklist
);
478 fibheap_delete (pending
);
479 sbitmap_free (visited
);
480 sbitmap_free (in_worklist
);
481 sbitmap_free (in_pending
);
484 fprintf (dump_file
, "Process remaining basic blocks\n");
487 move_or_delete_vzeroupper_1 (bb
, true);
489 free_aux_for_blocks ();
492 static rtx
legitimize_dllimport_symbol (rtx
, bool);
494 #ifndef CHECK_STACK_LIMIT
495 #define CHECK_STACK_LIMIT (-1)
498 /* Return index of given mode in mult and division cost tables. */
499 #define MODE_INDEX(mode) \
500 ((mode) == QImode ? 0 \
501 : (mode) == HImode ? 1 \
502 : (mode) == SImode ? 2 \
503 : (mode) == DImode ? 3 \
506 /* Processor costs (relative to an add) */
507 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
508 #define COSTS_N_BYTES(N) ((N) * 2)
510 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
513 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
514 COSTS_N_BYTES (2), /* cost of an add instruction */
515 COSTS_N_BYTES (3), /* cost of a lea instruction */
516 COSTS_N_BYTES (2), /* variable shift costs */
517 COSTS_N_BYTES (3), /* constant shift costs */
518 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
519 COSTS_N_BYTES (3), /* HI */
520 COSTS_N_BYTES (3), /* SI */
521 COSTS_N_BYTES (3), /* DI */
522 COSTS_N_BYTES (5)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
525 COSTS_N_BYTES (3), /* HI */
526 COSTS_N_BYTES (3), /* SI */
527 COSTS_N_BYTES (3), /* DI */
528 COSTS_N_BYTES (5)}, /* other */
529 COSTS_N_BYTES (3), /* cost of movsx */
530 COSTS_N_BYTES (3), /* cost of movzx */
531 0, /* "large" insn */
533 2, /* cost for loading QImode using movzbl */
534 {2, 2, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 2}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {2, 2, 2}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 3, /* cost of moving MMX register */
544 {3, 3}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {3, 3}, /* cost of storing MMX registers
547 in SImode and DImode */
548 3, /* cost of moving SSE register */
549 {3, 3, 3}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {3, 3, 3}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
559 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
561 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
562 COSTS_N_BYTES (2), /* cost of FABS instruction. */
563 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
564 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
566 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
567 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
568 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 1, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 1, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 /* Processor costs (relative to an add) */
584 struct processor_costs i386_cost
= { /* 386 specific costs */
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (1), /* cost of a lea instruction */
587 COSTS_N_INSNS (3), /* variable shift costs */
588 COSTS_N_INSNS (2), /* constant shift costs */
589 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (6), /* HI */
591 COSTS_N_INSNS (6), /* SI */
592 COSTS_N_INSNS (6), /* DI */
593 COSTS_N_INSNS (6)}, /* other */
594 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (23), /* HI */
597 COSTS_N_INSNS (23), /* SI */
598 COSTS_N_INSNS (23), /* DI */
599 COSTS_N_INSNS (23)}, /* other */
600 COSTS_N_INSNS (3), /* cost of movsx */
601 COSTS_N_INSNS (2), /* cost of movzx */
602 15, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {2, 4, 2}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {2, 4, 2}, /* cost of storing integer registers */
609 2, /* cost of reg,reg fld/fst */
610 {8, 8, 8}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {8, 8, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 8}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 8}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 8, 16}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 8, 16}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 3, /* MMX or SSE register to integer */
625 0, /* size of l1 cache */
626 0, /* size of l2 cache */
627 0, /* size of prefetch block */
628 0, /* number of parallel prefetches */
630 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (22), /* cost of FABS instruction. */
634 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
636 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
637 DUMMY_STRINGOP_ALGS
},
638 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
639 DUMMY_STRINGOP_ALGS
},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs i486_cost
= { /* 486 specific costs */
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (1), /* cost of a lea instruction */
657 COSTS_N_INSNS (3), /* variable shift costs */
658 COSTS_N_INSNS (2), /* constant shift costs */
659 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (12), /* HI */
661 COSTS_N_INSNS (12), /* SI */
662 COSTS_N_INSNS (12), /* DI */
663 COSTS_N_INSNS (12)}, /* other */
664 1, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (40), /* HI */
667 COSTS_N_INSNS (40), /* SI */
668 COSTS_N_INSNS (40), /* DI */
669 COSTS_N_INSNS (40)}, /* other */
670 COSTS_N_INSNS (3), /* cost of movsx */
671 COSTS_N_INSNS (2), /* cost of movzx */
672 15, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {2, 4, 2}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {2, 4, 2}, /* cost of storing integer registers */
679 2, /* cost of reg,reg fld/fst */
680 {8, 8, 8}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {8, 8, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {4, 8}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 8}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 8, 16}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 8, 16}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 3, /* MMX or SSE register to integer */
695 4, /* size of l1 cache. 486 has 8kB cache
696 shared for code and data, so 4kB is
697 not really precise. */
698 4, /* size of l2 cache */
699 0, /* size of prefetch block */
700 0, /* number of parallel prefetches */
702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (3), /* cost of FABS instruction. */
706 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
708 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
709 DUMMY_STRINGOP_ALGS
},
710 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
711 DUMMY_STRINGOP_ALGS
},
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
726 struct processor_costs pentium_cost
= {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1), /* cost of a lea instruction */
729 COSTS_N_INSNS (4), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (11), /* HI */
733 COSTS_N_INSNS (11), /* SI */
734 COSTS_N_INSNS (11), /* DI */
735 COSTS_N_INSNS (11)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (25), /* HI */
739 COSTS_N_INSNS (25), /* SI */
740 COSTS_N_INSNS (25), /* DI */
741 COSTS_N_INSNS (25)}, /* other */
742 COSTS_N_INSNS (3), /* cost of movsx */
743 COSTS_N_INSNS (2), /* cost of movzx */
744 8, /* "large" insn */
746 6, /* cost for loading QImode using movzbl */
747 {2, 4, 2}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {2, 4, 2}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {2, 2, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 6}, /* cost of storing fp registers
755 in SFmode, DFmode and XFmode */
756 8, /* cost of moving MMX register */
757 {8, 8}, /* cost of loading MMX registers
758 in SImode and DImode */
759 {8, 8}, /* cost of storing MMX registers
760 in SImode and DImode */
761 2, /* cost of moving SSE register */
762 {4, 8, 16}, /* cost of loading SSE registers
763 in SImode, DImode and TImode */
764 {4, 8, 16}, /* cost of storing SSE registers
765 in SImode, DImode and TImode */
766 3, /* MMX or SSE register to integer */
767 8, /* size of l1 cache. */
768 8, /* size of l2 cache */
769 0, /* size of prefetch block */
770 0, /* number of parallel prefetches */
772 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
773 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
774 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
775 COSTS_N_INSNS (1), /* cost of FABS instruction. */
776 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
777 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
778 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
779 DUMMY_STRINGOP_ALGS
},
780 {{libcall
, {{-1, rep_prefix_4_byte
}}},
781 DUMMY_STRINGOP_ALGS
},
782 1, /* scalar_stmt_cost. */
783 1, /* scalar load_cost. */
784 1, /* scalar_store_cost. */
785 1, /* vec_stmt_cost. */
786 1, /* vec_to_scalar_cost. */
787 1, /* scalar_to_vec_cost. */
788 1, /* vec_align_load_cost. */
789 2, /* vec_unalign_load_cost. */
790 1, /* vec_store_cost. */
791 3, /* cond_taken_branch_cost. */
792 1, /* cond_not_taken_branch_cost. */
796 struct processor_costs pentiumpro_cost
= {
797 COSTS_N_INSNS (1), /* cost of an add instruction */
798 COSTS_N_INSNS (1), /* cost of a lea instruction */
799 COSTS_N_INSNS (1), /* variable shift costs */
800 COSTS_N_INSNS (1), /* constant shift costs */
801 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
802 COSTS_N_INSNS (4), /* HI */
803 COSTS_N_INSNS (4), /* SI */
804 COSTS_N_INSNS (4), /* DI */
805 COSTS_N_INSNS (4)}, /* other */
806 0, /* cost of multiply per each bit set */
807 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
808 COSTS_N_INSNS (17), /* HI */
809 COSTS_N_INSNS (17), /* SI */
810 COSTS_N_INSNS (17), /* DI */
811 COSTS_N_INSNS (17)}, /* other */
812 COSTS_N_INSNS (1), /* cost of movsx */
813 COSTS_N_INSNS (1), /* cost of movzx */
814 8, /* "large" insn */
816 2, /* cost for loading QImode using movzbl */
817 {4, 4, 4}, /* cost of loading integer registers
818 in QImode, HImode and SImode.
819 Relative to reg-reg move (2). */
820 {2, 2, 2}, /* cost of storing integer registers */
821 2, /* cost of reg,reg fld/fst */
822 {2, 2, 6}, /* cost of loading fp registers
823 in SFmode, DFmode and XFmode */
824 {4, 4, 6}, /* cost of storing fp registers
825 in SFmode, DFmode and XFmode */
826 2, /* cost of moving MMX register */
827 {2, 2}, /* cost of loading MMX registers
828 in SImode and DImode */
829 {2, 2}, /* cost of storing MMX registers
830 in SImode and DImode */
831 2, /* cost of moving SSE register */
832 {2, 2, 8}, /* cost of loading SSE registers
833 in SImode, DImode and TImode */
834 {2, 2, 8}, /* cost of storing SSE registers
835 in SImode, DImode and TImode */
836 3, /* MMX or SSE register to integer */
837 8, /* size of l1 cache. */
838 256, /* size of l2 cache */
839 32, /* size of prefetch block */
840 6, /* number of parallel prefetches */
842 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
843 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
844 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
845 COSTS_N_INSNS (2), /* cost of FABS instruction. */
846 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
847 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
848 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
849 (we ensure the alignment). For small blocks inline loop is still a
850 noticeable win, for bigger blocks either rep movsl or rep movsb is
851 way to go. Rep movsb has apparently more expensive startup time in CPU,
852 but after 4K the difference is down in the noise. */
853 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
854 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
855 DUMMY_STRINGOP_ALGS
},
856 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
857 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
858 DUMMY_STRINGOP_ALGS
},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
873 struct processor_costs geode_cost
= {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (1), /* cost of a lea instruction */
876 COSTS_N_INSNS (2), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (4), /* HI */
880 COSTS_N_INSNS (7), /* SI */
881 COSTS_N_INSNS (7), /* DI */
882 COSTS_N_INSNS (7)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (23), /* HI */
886 COSTS_N_INSNS (39), /* SI */
887 COSTS_N_INSNS (39), /* DI */
888 COSTS_N_INSNS (39)}, /* other */
889 COSTS_N_INSNS (1), /* cost of movsx */
890 COSTS_N_INSNS (1), /* cost of movzx */
891 8, /* "large" insn */
893 1, /* cost for loading QImode using movzbl */
894 {1, 1, 1}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {1, 1, 1}, /* cost of storing integer registers */
898 1, /* cost of reg,reg fld/fst */
899 {1, 1, 1}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 6, 6}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
904 1, /* cost of moving MMX register */
905 {1, 1}, /* cost of loading MMX registers
906 in SImode and DImode */
907 {1, 1}, /* cost of storing MMX registers
908 in SImode and DImode */
909 1, /* cost of moving SSE register */
910 {1, 1, 1}, /* cost of loading SSE registers
911 in SImode, DImode and TImode */
912 {1, 1, 1}, /* cost of storing SSE registers
913 in SImode, DImode and TImode */
914 1, /* MMX or SSE register to integer */
915 64, /* size of l1 cache. */
916 128, /* size of l2 cache. */
917 32, /* size of prefetch block */
918 1, /* number of parallel prefetches */
920 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (1), /* cost of FABS instruction. */
924 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
926 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
927 DUMMY_STRINGOP_ALGS
},
928 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
929 DUMMY_STRINGOP_ALGS
},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k6_cost
= {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (3), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (3), /* DI */
953 COSTS_N_INSNS (3)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (18), /* HI */
957 COSTS_N_INSNS (18), /* SI */
958 COSTS_N_INSNS (18), /* DI */
959 COSTS_N_INSNS (18)}, /* other */
960 COSTS_N_INSNS (2), /* cost of movsx */
961 COSTS_N_INSNS (2), /* cost of movzx */
962 8, /* "large" insn */
964 3, /* cost for loading QImode using movzbl */
965 {4, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {2, 3, 2}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {6, 6, 6}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 4}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {2, 2}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {2, 2}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {2, 2, 8}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {2, 2, 8}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 6, /* MMX or SSE register to integer */
985 32, /* size of l1 cache. */
986 32, /* size of l2 cache. Some models
987 have integrated l2 cache, but
988 optimizing for k6 is not important
989 enough to worry about that. */
990 32, /* size of prefetch block */
991 1, /* number of parallel prefetches */
993 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
994 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
995 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
996 COSTS_N_INSNS (2), /* cost of FABS instruction. */
997 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
998 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
999 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1000 DUMMY_STRINGOP_ALGS
},
1001 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1002 DUMMY_STRINGOP_ALGS
},
1003 1, /* scalar_stmt_cost. */
1004 1, /* scalar load_cost. */
1005 1, /* scalar_store_cost. */
1006 1, /* vec_stmt_cost. */
1007 1, /* vec_to_scalar_cost. */
1008 1, /* scalar_to_vec_cost. */
1009 1, /* vec_align_load_cost. */
1010 2, /* vec_unalign_load_cost. */
1011 1, /* vec_store_cost. */
1012 3, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs athlon_cost
= {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (2), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (5), /* HI */
1024 COSTS_N_INSNS (5), /* SI */
1025 COSTS_N_INSNS (5), /* DI */
1026 COSTS_N_INSNS (5)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (26), /* HI */
1030 COSTS_N_INSNS (42), /* SI */
1031 COSTS_N_INSNS (74), /* DI */
1032 COSTS_N_INSNS (74)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {3, 4, 3}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {3, 4, 3}, /* cost of storing integer registers */
1042 4, /* cost of reg,reg fld/fst */
1043 {4, 4, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {6, 6, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 6}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 5}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 5, /* MMX or SSE register to integer */
1058 64, /* size of l1 cache. */
1059 256, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 6, /* number of parallel prefetches */
1062 5, /* Branch cost */
1063 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1069 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1070 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1071 128 bytes for memset. */
1072 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1073 DUMMY_STRINGOP_ALGS
},
1074 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1075 DUMMY_STRINGOP_ALGS
},
1076 1, /* scalar_stmt_cost. */
1077 1, /* scalar load_cost. */
1078 1, /* scalar_store_cost. */
1079 1, /* vec_stmt_cost. */
1080 1, /* vec_to_scalar_cost. */
1081 1, /* scalar_to_vec_cost. */
1082 1, /* vec_align_load_cost. */
1083 2, /* vec_unalign_load_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1090 struct processor_costs k8_cost
= {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (2), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (3), /* SI */
1098 COSTS_N_INSNS (4), /* DI */
1099 COSTS_N_INSNS (5)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (26), /* HI */
1103 COSTS_N_INSNS (42), /* SI */
1104 COSTS_N_INSNS (74), /* DI */
1105 COSTS_N_INSNS (74)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {3, 4, 3}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {3, 4, 3}, /* cost of storing integer registers */
1115 4, /* cost of reg,reg fld/fst */
1116 {4, 4, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {6, 6, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {3, 3}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 3, 6}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 5}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 5, /* MMX or SSE register to integer */
1131 64, /* size of l1 cache. */
1132 512, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 3, /* Branch cost */
1141 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1147 /* K8 has optimized REP instruction for medium sized blocks, but for very
1148 small blocks it is better to use loop. For large blocks, libcall can
1149 do nontemporary accesses and beat inline considerably. */
1150 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1151 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1152 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1153 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1154 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1155 4, /* scalar_stmt_cost. */
1156 2, /* scalar load_cost. */
1157 2, /* scalar_store_cost. */
1158 5, /* vec_stmt_cost. */
1159 0, /* vec_to_scalar_cost. */
1160 2, /* scalar_to_vec_cost. */
1161 2, /* vec_align_load_cost. */
1162 3, /* vec_unalign_load_cost. */
1163 3, /* vec_store_cost. */
1164 3, /* cond_taken_branch_cost. */
1165 2, /* cond_not_taken_branch_cost. */
1168 struct processor_costs amdfam10_cost
= {
1169 COSTS_N_INSNS (1), /* cost of an add instruction */
1170 COSTS_N_INSNS (2), /* cost of a lea instruction */
1171 COSTS_N_INSNS (1), /* variable shift costs */
1172 COSTS_N_INSNS (1), /* constant shift costs */
1173 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1174 COSTS_N_INSNS (4), /* HI */
1175 COSTS_N_INSNS (3), /* SI */
1176 COSTS_N_INSNS (4), /* DI */
1177 COSTS_N_INSNS (5)}, /* other */
1178 0, /* cost of multiply per each bit set */
1179 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1180 COSTS_N_INSNS (35), /* HI */
1181 COSTS_N_INSNS (51), /* SI */
1182 COSTS_N_INSNS (83), /* DI */
1183 COSTS_N_INSNS (83)}, /* other */
1184 COSTS_N_INSNS (1), /* cost of movsx */
1185 COSTS_N_INSNS (1), /* cost of movzx */
1186 8, /* "large" insn */
1188 4, /* cost for loading QImode using movzbl */
1189 {3, 4, 3}, /* cost of loading integer registers
1190 in QImode, HImode and SImode.
1191 Relative to reg-reg move (2). */
1192 {3, 4, 3}, /* cost of storing integer registers */
1193 4, /* cost of reg,reg fld/fst */
1194 {4, 4, 12}, /* cost of loading fp registers
1195 in SFmode, DFmode and XFmode */
1196 {6, 6, 8}, /* cost of storing fp registers
1197 in SFmode, DFmode and XFmode */
1198 2, /* cost of moving MMX register */
1199 {3, 3}, /* cost of loading MMX registers
1200 in SImode and DImode */
1201 {4, 4}, /* cost of storing MMX registers
1202 in SImode and DImode */
1203 2, /* cost of moving SSE register */
1204 {4, 4, 3}, /* cost of loading SSE registers
1205 in SImode, DImode and TImode */
1206 {4, 4, 5}, /* cost of storing SSE registers
1207 in SImode, DImode and TImode */
1208 3, /* MMX or SSE register to integer */
1210 MOVD reg64, xmmreg Double FSTORE 4
1211 MOVD reg32, xmmreg Double FSTORE 4
1213 MOVD reg64, xmmreg Double FADD 3
1215 MOVD reg32, xmmreg Double FADD 3
1217 64, /* size of l1 cache. */
1218 512, /* size of l2 cache. */
1219 64, /* size of prefetch block */
1220 /* New AMD processors never drop prefetches; if they cannot be performed
1221 immediately, they are queued. We set number of simultaneous prefetches
1222 to a large constant to reflect this (it probably is not a good idea not
1223 to limit number of prefetches at all, as their execution also takes some
1225 100, /* number of parallel prefetches */
1226 2, /* Branch cost */
1227 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1228 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1229 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1230 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1231 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1232 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1234 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1235 very small blocks it is better to use loop. For large blocks, libcall can
1236 do nontemporary accesses and beat inline considerably. */
1237 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1238 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1239 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1240 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1241 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1242 4, /* scalar_stmt_cost. */
1243 2, /* scalar load_cost. */
1244 2, /* scalar_store_cost. */
1245 6, /* vec_stmt_cost. */
1246 0, /* vec_to_scalar_cost. */
1247 2, /* scalar_to_vec_cost. */
1248 2, /* vec_align_load_cost. */
1249 2, /* vec_unalign_load_cost. */
1250 2, /* vec_store_cost. */
1251 2, /* cond_taken_branch_cost. */
1252 1, /* cond_not_taken_branch_cost. */
1255 struct processor_costs bdver1_cost
= {
1256 COSTS_N_INSNS (1), /* cost of an add instruction */
1257 COSTS_N_INSNS (1), /* cost of a lea instruction */
1258 COSTS_N_INSNS (1), /* variable shift costs */
1259 COSTS_N_INSNS (1), /* constant shift costs */
1260 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1261 COSTS_N_INSNS (4), /* HI */
1262 COSTS_N_INSNS (4), /* SI */
1263 COSTS_N_INSNS (6), /* DI */
1264 COSTS_N_INSNS (6)}, /* other */
1265 0, /* cost of multiply per each bit set */
1266 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1267 COSTS_N_INSNS (35), /* HI */
1268 COSTS_N_INSNS (51), /* SI */
1269 COSTS_N_INSNS (83), /* DI */
1270 COSTS_N_INSNS (83)}, /* other */
1271 COSTS_N_INSNS (1), /* cost of movsx */
1272 COSTS_N_INSNS (1), /* cost of movzx */
1273 8, /* "large" insn */
1275 4, /* cost for loading QImode using movzbl */
1276 {5, 5, 4}, /* cost of loading integer registers
1277 in QImode, HImode and SImode.
1278 Relative to reg-reg move (2). */
1279 {4, 4, 4}, /* cost of storing integer registers */
1280 2, /* cost of reg,reg fld/fst */
1281 {5, 5, 12}, /* cost of loading fp registers
1282 in SFmode, DFmode and XFmode */
1283 {4, 4, 8}, /* cost of storing fp registers
1284 in SFmode, DFmode and XFmode */
1285 2, /* cost of moving MMX register */
1286 {4, 4}, /* cost of loading MMX registers
1287 in SImode and DImode */
1288 {4, 4}, /* cost of storing MMX registers
1289 in SImode and DImode */
1290 2, /* cost of moving SSE register */
1291 {4, 4, 4}, /* cost of loading SSE registers
1292 in SImode, DImode and TImode */
1293 {4, 4, 4}, /* cost of storing SSE registers
1294 in SImode, DImode and TImode */
1295 2, /* MMX or SSE register to integer */
1297 MOVD reg64, xmmreg Double FSTORE 4
1298 MOVD reg32, xmmreg Double FSTORE 4
1300 MOVD reg64, xmmreg Double FADD 3
1302 MOVD reg32, xmmreg Double FADD 3
1304 16, /* size of l1 cache. */
1305 2048, /* size of l2 cache. */
1306 64, /* size of prefetch block */
1307 /* New AMD processors never drop prefetches; if they cannot be performed
1308 immediately, they are queued. We set number of simultaneous prefetches
1309 to a large constant to reflect this (it probably is not a good idea not
1310 to limit number of prefetches at all, as their execution also takes some
1312 100, /* number of parallel prefetches */
1313 2, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1322 very small blocks it is better to use loop. For large blocks, libcall
1323 can do nontemporary accesses and beat inline considerably. */
1324 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1325 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1326 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1327 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1328 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1329 6, /* scalar_stmt_cost. */
1330 4, /* scalar load_cost. */
1331 4, /* scalar_store_cost. */
1332 6, /* vec_stmt_cost. */
1333 0, /* vec_to_scalar_cost. */
1334 2, /* scalar_to_vec_cost. */
1335 4, /* vec_align_load_cost. */
1336 4, /* vec_unalign_load_cost. */
1337 4, /* vec_store_cost. */
1338 2, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1342 struct processor_costs bdver2_cost
= {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (1), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (4), /* HI */
1349 COSTS_N_INSNS (4), /* SI */
1350 COSTS_N_INSNS (6), /* DI */
1351 COSTS_N_INSNS (6)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (35), /* HI */
1355 COSTS_N_INSNS (51), /* SI */
1356 COSTS_N_INSNS (83), /* DI */
1357 COSTS_N_INSNS (83)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 8, /* "large" insn */
1362 4, /* cost for loading QImode using movzbl */
1363 {5, 5, 4}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {4, 4, 4}, /* cost of storing integer registers */
1367 2, /* cost of reg,reg fld/fst */
1368 {5, 5, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {4, 4, 8}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 2, /* cost of moving MMX register */
1373 {4, 4}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {4, 4}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 2, /* cost of moving SSE register */
1378 {4, 4, 4}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {4, 4, 4}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 2, /* MMX or SSE register to integer */
1384 MOVD reg64, xmmreg Double FSTORE 4
1385 MOVD reg32, xmmreg Double FSTORE 4
1387 MOVD reg64, xmmreg Double FADD 3
1389 MOVD reg32, xmmreg Double FADD 3
1391 16, /* size of l1 cache. */
1392 2048, /* size of l2 cache. */
1393 64, /* size of prefetch block */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches */
1400 2, /* Branch cost */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1408 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1409 very small blocks it is better to use loop. For large blocks, libcall
1410 can do nontemporary accesses and beat inline considerably. */
1411 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1412 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1413 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1414 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1415 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1416 6, /* scalar_stmt_cost. */
1417 4, /* scalar load_cost. */
1418 4, /* scalar_store_cost. */
1419 6, /* vec_stmt_cost. */
1420 0, /* vec_to_scalar_cost. */
1421 2, /* scalar_to_vec_cost. */
1422 4, /* vec_align_load_cost. */
1423 4, /* vec_unalign_load_cost. */
1424 4, /* vec_store_cost. */
1425 2, /* cond_taken_branch_cost. */
1426 1, /* cond_not_taken_branch_cost. */
1429 struct processor_costs btver1_cost
= {
1430 COSTS_N_INSNS (1), /* cost of an add instruction */
1431 COSTS_N_INSNS (2), /* cost of a lea instruction */
1432 COSTS_N_INSNS (1), /* variable shift costs */
1433 COSTS_N_INSNS (1), /* constant shift costs */
1434 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1435 COSTS_N_INSNS (4), /* HI */
1436 COSTS_N_INSNS (3), /* SI */
1437 COSTS_N_INSNS (4), /* DI */
1438 COSTS_N_INSNS (5)}, /* other */
1439 0, /* cost of multiply per each bit set */
1440 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1441 COSTS_N_INSNS (35), /* HI */
1442 COSTS_N_INSNS (51), /* SI */
1443 COSTS_N_INSNS (83), /* DI */
1444 COSTS_N_INSNS (83)}, /* other */
1445 COSTS_N_INSNS (1), /* cost of movsx */
1446 COSTS_N_INSNS (1), /* cost of movzx */
1447 8, /* "large" insn */
1449 4, /* cost for loading QImode using movzbl */
1450 {3, 4, 3}, /* cost of loading integer registers
1451 in QImode, HImode and SImode.
1452 Relative to reg-reg move (2). */
1453 {3, 4, 3}, /* cost of storing integer registers */
1454 4, /* cost of reg,reg fld/fst */
1455 {4, 4, 12}, /* cost of loading fp registers
1456 in SFmode, DFmode and XFmode */
1457 {6, 6, 8}, /* cost of storing fp registers
1458 in SFmode, DFmode and XFmode */
1459 2, /* cost of moving MMX register */
1460 {3, 3}, /* cost of loading MMX registers
1461 in SImode and DImode */
1462 {4, 4}, /* cost of storing MMX registers
1463 in SImode and DImode */
1464 2, /* cost of moving SSE register */
1465 {4, 4, 3}, /* cost of loading SSE registers
1466 in SImode, DImode and TImode */
1467 {4, 4, 5}, /* cost of storing SSE registers
1468 in SImode, DImode and TImode */
1469 3, /* MMX or SSE register to integer */
1471 MOVD reg64, xmmreg Double FSTORE 4
1472 MOVD reg32, xmmreg Double FSTORE 4
1474 MOVD reg64, xmmreg Double FADD 3
1476 MOVD reg32, xmmreg Double FADD 3
1478 32, /* size of l1 cache. */
1479 512, /* size of l2 cache. */
1480 64, /* size of prefetch block */
1481 100, /* number of parallel prefetches */
1482 2, /* Branch cost */
1483 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1484 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1485 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1486 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1487 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1488 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1490 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1491 very small blocks it is better to use loop. For large blocks, libcall can
1492 do nontemporary accesses and beat inline considerably. */
1493 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1494 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1495 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1496 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1497 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1498 4, /* scalar_stmt_cost. */
1499 2, /* scalar load_cost. */
1500 2, /* scalar_store_cost. */
1501 6, /* vec_stmt_cost. */
1502 0, /* vec_to_scalar_cost. */
1503 2, /* scalar_to_vec_cost. */
1504 2, /* vec_align_load_cost. */
1505 2, /* vec_unalign_load_cost. */
1506 2, /* vec_store_cost. */
1507 2, /* cond_taken_branch_cost. */
1508 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs pentium4_cost
= {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (3), /* cost of a lea instruction */
1515 COSTS_N_INSNS (4), /* variable shift costs */
1516 COSTS_N_INSNS (4), /* constant shift costs */
1517 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (15), /* HI */
1519 COSTS_N_INSNS (15), /* SI */
1520 COSTS_N_INSNS (15), /* DI */
1521 COSTS_N_INSNS (15)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (56), /* HI */
1525 COSTS_N_INSNS (56), /* SI */
1526 COSTS_N_INSNS (56), /* DI */
1527 COSTS_N_INSNS (56)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 16, /* "large" insn */
1532 2, /* cost for loading QImode using movzbl */
1533 {4, 5, 4}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {2, 3, 2}, /* cost of storing integer registers */
1537 2, /* cost of reg,reg fld/fst */
1538 {2, 2, 6}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {4, 4, 6}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {2, 2}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {2, 2}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 12, /* cost of moving SSE register */
1548 {12, 12, 12}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {2, 2, 8}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 10, /* MMX or SSE register to integer */
1553 8, /* size of l1 cache. */
1554 256, /* size of l2 cache. */
1555 64, /* size of prefetch block */
1556 6, /* number of parallel prefetches */
1557 2, /* Branch cost */
1558 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1559 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1560 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1563 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1564 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1565 DUMMY_STRINGOP_ALGS
},
1566 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1568 DUMMY_STRINGOP_ALGS
},
1569 1, /* scalar_stmt_cost. */
1570 1, /* scalar load_cost. */
1571 1, /* scalar_store_cost. */
1572 1, /* vec_stmt_cost. */
1573 1, /* vec_to_scalar_cost. */
1574 1, /* scalar_to_vec_cost. */
1575 1, /* vec_align_load_cost. */
1576 2, /* vec_unalign_load_cost. */
1577 1, /* vec_store_cost. */
1578 3, /* cond_taken_branch_cost. */
1579 1, /* cond_not_taken_branch_cost. */
1583 struct processor_costs nocona_cost
= {
1584 COSTS_N_INSNS (1), /* cost of an add instruction */
1585 COSTS_N_INSNS (1), /* cost of a lea instruction */
1586 COSTS_N_INSNS (1), /* variable shift costs */
1587 COSTS_N_INSNS (1), /* constant shift costs */
1588 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1589 COSTS_N_INSNS (10), /* HI */
1590 COSTS_N_INSNS (10), /* SI */
1591 COSTS_N_INSNS (10), /* DI */
1592 COSTS_N_INSNS (10)}, /* other */
1593 0, /* cost of multiply per each bit set */
1594 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1595 COSTS_N_INSNS (66), /* HI */
1596 COSTS_N_INSNS (66), /* SI */
1597 COSTS_N_INSNS (66), /* DI */
1598 COSTS_N_INSNS (66)}, /* other */
1599 COSTS_N_INSNS (1), /* cost of movsx */
1600 COSTS_N_INSNS (1), /* cost of movzx */
1601 16, /* "large" insn */
1602 17, /* MOVE_RATIO */
1603 4, /* cost for loading QImode using movzbl */
1604 {4, 4, 4}, /* cost of loading integer registers
1605 in QImode, HImode and SImode.
1606 Relative to reg-reg move (2). */
1607 {4, 4, 4}, /* cost of storing integer registers */
1608 3, /* cost of reg,reg fld/fst */
1609 {12, 12, 12}, /* cost of loading fp registers
1610 in SFmode, DFmode and XFmode */
1611 {4, 4, 4}, /* cost of storing fp registers
1612 in SFmode, DFmode and XFmode */
1613 6, /* cost of moving MMX register */
1614 {12, 12}, /* cost of loading MMX registers
1615 in SImode and DImode */
1616 {12, 12}, /* cost of storing MMX registers
1617 in SImode and DImode */
1618 6, /* cost of moving SSE register */
1619 {12, 12, 12}, /* cost of loading SSE registers
1620 in SImode, DImode and TImode */
1621 {12, 12, 12}, /* cost of storing SSE registers
1622 in SImode, DImode and TImode */
1623 8, /* MMX or SSE register to integer */
1624 8, /* size of l1 cache. */
1625 1024, /* size of l2 cache. */
1626 128, /* size of prefetch block */
1627 8, /* number of parallel prefetches */
1628 1, /* Branch cost */
1629 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1630 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1631 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1632 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1633 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1634 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1635 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1636 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1637 {100000, unrolled_loop
}, {-1, libcall
}}}},
1638 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1640 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1641 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1642 1, /* scalar_stmt_cost. */
1643 1, /* scalar load_cost. */
1644 1, /* scalar_store_cost. */
1645 1, /* vec_stmt_cost. */
1646 1, /* vec_to_scalar_cost. */
1647 1, /* scalar_to_vec_cost. */
1648 1, /* vec_align_load_cost. */
1649 2, /* vec_unalign_load_cost. */
1650 1, /* vec_store_cost. */
1651 3, /* cond_taken_branch_cost. */
1652 1, /* cond_not_taken_branch_cost. */
1656 struct processor_costs atom_cost
= {
1657 COSTS_N_INSNS (1), /* cost of an add instruction */
1658 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1659 COSTS_N_INSNS (1), /* variable shift costs */
1660 COSTS_N_INSNS (1), /* constant shift costs */
1661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1662 COSTS_N_INSNS (4), /* HI */
1663 COSTS_N_INSNS (3), /* SI */
1664 COSTS_N_INSNS (4), /* DI */
1665 COSTS_N_INSNS (2)}, /* other */
1666 0, /* cost of multiply per each bit set */
1667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1668 COSTS_N_INSNS (26), /* HI */
1669 COSTS_N_INSNS (42), /* SI */
1670 COSTS_N_INSNS (74), /* DI */
1671 COSTS_N_INSNS (74)}, /* other */
1672 COSTS_N_INSNS (1), /* cost of movsx */
1673 COSTS_N_INSNS (1), /* cost of movzx */
1674 8, /* "large" insn */
1675 17, /* MOVE_RATIO */
1676 4, /* cost for loading QImode using movzbl */
1677 {4, 4, 4}, /* cost of loading integer registers
1678 in QImode, HImode and SImode.
1679 Relative to reg-reg move (2). */
1680 {4, 4, 4}, /* cost of storing integer registers */
1681 4, /* cost of reg,reg fld/fst */
1682 {12, 12, 12}, /* cost of loading fp registers
1683 in SFmode, DFmode and XFmode */
1684 {6, 6, 8}, /* cost of storing fp registers
1685 in SFmode, DFmode and XFmode */
1686 2, /* cost of moving MMX register */
1687 {8, 8}, /* cost of loading MMX registers
1688 in SImode and DImode */
1689 {8, 8}, /* cost of storing MMX registers
1690 in SImode and DImode */
1691 2, /* cost of moving SSE register */
1692 {8, 8, 8}, /* cost of loading SSE registers
1693 in SImode, DImode and TImode */
1694 {8, 8, 8}, /* cost of storing SSE registers
1695 in SImode, DImode and TImode */
1696 5, /* MMX or SSE register to integer */
1697 32, /* size of l1 cache. */
1698 256, /* size of l2 cache. */
1699 64, /* size of prefetch block */
1700 6, /* number of parallel prefetches */
1701 3, /* Branch cost */
1702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1703 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1704 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1705 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1706 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1707 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1708 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1709 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1710 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1711 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1712 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1713 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1714 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1715 1, /* scalar_stmt_cost. */
1716 1, /* scalar load_cost. */
1717 1, /* scalar_store_cost. */
1718 1, /* vec_stmt_cost. */
1719 1, /* vec_to_scalar_cost. */
1720 1, /* scalar_to_vec_cost. */
1721 1, /* vec_align_load_cost. */
1722 2, /* vec_unalign_load_cost. */
1723 1, /* vec_store_cost. */
1724 3, /* cond_taken_branch_cost. */
1725 1, /* cond_not_taken_branch_cost. */
1728 /* Generic64 should produce code tuned for Nocona and K8. */
1730 struct processor_costs generic64_cost
= {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 /* On all chips taken into consideration lea is 2 cycles and more. With
1733 this cost however our current implementation of synth_mult results in
1734 use of unnecessary temporary registers causing regression on several
1735 SPECfp benchmarks. */
1736 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1737 COSTS_N_INSNS (1), /* variable shift costs */
1738 COSTS_N_INSNS (1), /* constant shift costs */
1739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1740 COSTS_N_INSNS (4), /* HI */
1741 COSTS_N_INSNS (3), /* SI */
1742 COSTS_N_INSNS (4), /* DI */
1743 COSTS_N_INSNS (2)}, /* other */
1744 0, /* cost of multiply per each bit set */
1745 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1746 COSTS_N_INSNS (26), /* HI */
1747 COSTS_N_INSNS (42), /* SI */
1748 COSTS_N_INSNS (74), /* DI */
1749 COSTS_N_INSNS (74)}, /* other */
1750 COSTS_N_INSNS (1), /* cost of movsx */
1751 COSTS_N_INSNS (1), /* cost of movzx */
1752 8, /* "large" insn */
1753 17, /* MOVE_RATIO */
1754 4, /* cost for loading QImode using movzbl */
1755 {4, 4, 4}, /* cost of loading integer registers
1756 in QImode, HImode and SImode.
1757 Relative to reg-reg move (2). */
1758 {4, 4, 4}, /* cost of storing integer registers */
1759 4, /* cost of reg,reg fld/fst */
1760 {12, 12, 12}, /* cost of loading fp registers
1761 in SFmode, DFmode and XFmode */
1762 {6, 6, 8}, /* cost of storing fp registers
1763 in SFmode, DFmode and XFmode */
1764 2, /* cost of moving MMX register */
1765 {8, 8}, /* cost of loading MMX registers
1766 in SImode and DImode */
1767 {8, 8}, /* cost of storing MMX registers
1768 in SImode and DImode */
1769 2, /* cost of moving SSE register */
1770 {8, 8, 8}, /* cost of loading SSE registers
1771 in SImode, DImode and TImode */
1772 {8, 8, 8}, /* cost of storing SSE registers
1773 in SImode, DImode and TImode */
1774 5, /* MMX or SSE register to integer */
1775 32, /* size of l1 cache. */
1776 512, /* size of l2 cache. */
1777 64, /* size of prefetch block */
1778 6, /* number of parallel prefetches */
1779 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1780 value is increased to perhaps more appropriate value of 5. */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {DUMMY_STRINGOP_ALGS
,
1789 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1790 {DUMMY_STRINGOP_ALGS
,
1791 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1792 1, /* scalar_stmt_cost. */
1793 1, /* scalar load_cost. */
1794 1, /* scalar_store_cost. */
1795 1, /* vec_stmt_cost. */
1796 1, /* vec_to_scalar_cost. */
1797 1, /* scalar_to_vec_cost. */
1798 1, /* vec_align_load_cost. */
1799 2, /* vec_unalign_load_cost. */
1800 1, /* vec_store_cost. */
1801 3, /* cond_taken_branch_cost. */
1802 1, /* cond_not_taken_branch_cost. */
1805 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1808 struct processor_costs generic32_cost
= {
1809 COSTS_N_INSNS (1), /* cost of an add instruction */
1810 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1811 COSTS_N_INSNS (1), /* variable shift costs */
1812 COSTS_N_INSNS (1), /* constant shift costs */
1813 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1814 COSTS_N_INSNS (4), /* HI */
1815 COSTS_N_INSNS (3), /* SI */
1816 COSTS_N_INSNS (4), /* DI */
1817 COSTS_N_INSNS (2)}, /* other */
1818 0, /* cost of multiply per each bit set */
1819 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1820 COSTS_N_INSNS (26), /* HI */
1821 COSTS_N_INSNS (42), /* SI */
1822 COSTS_N_INSNS (74), /* DI */
1823 COSTS_N_INSNS (74)}, /* other */
1824 COSTS_N_INSNS (1), /* cost of movsx */
1825 COSTS_N_INSNS (1), /* cost of movzx */
1826 8, /* "large" insn */
1827 17, /* MOVE_RATIO */
1828 4, /* cost for loading QImode using movzbl */
1829 {4, 4, 4}, /* cost of loading integer registers
1830 in QImode, HImode and SImode.
1831 Relative to reg-reg move (2). */
1832 {4, 4, 4}, /* cost of storing integer registers */
1833 4, /* cost of reg,reg fld/fst */
1834 {12, 12, 12}, /* cost of loading fp registers
1835 in SFmode, DFmode and XFmode */
1836 {6, 6, 8}, /* cost of storing fp registers
1837 in SFmode, DFmode and XFmode */
1838 2, /* cost of moving MMX register */
1839 {8, 8}, /* cost of loading MMX registers
1840 in SImode and DImode */
1841 {8, 8}, /* cost of storing MMX registers
1842 in SImode and DImode */
1843 2, /* cost of moving SSE register */
1844 {8, 8, 8}, /* cost of loading SSE registers
1845 in SImode, DImode and TImode */
1846 {8, 8, 8}, /* cost of storing SSE registers
1847 in SImode, DImode and TImode */
1848 5, /* MMX or SSE register to integer */
1849 32, /* size of l1 cache. */
1850 256, /* size of l2 cache. */
1851 64, /* size of prefetch block */
1852 6, /* number of parallel prefetches */
1853 3, /* Branch cost */
1854 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1855 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1856 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1857 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1858 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1859 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1860 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1861 DUMMY_STRINGOP_ALGS
},
1862 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1863 DUMMY_STRINGOP_ALGS
},
1864 1, /* scalar_stmt_cost. */
1865 1, /* scalar load_cost. */
1866 1, /* scalar_store_cost. */
1867 1, /* vec_stmt_cost. */
1868 1, /* vec_to_scalar_cost. */
1869 1, /* scalar_to_vec_cost. */
1870 1, /* vec_align_load_cost. */
1871 2, /* vec_unalign_load_cost. */
1872 1, /* vec_store_cost. */
1873 3, /* cond_taken_branch_cost. */
1874 1, /* cond_not_taken_branch_cost. */
1877 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1879 /* Processor feature/optimization bitmasks. */
1880 #define m_386 (1<<PROCESSOR_I386)
1881 #define m_486 (1<<PROCESSOR_I486)
1882 #define m_PENT (1<<PROCESSOR_PENTIUM)
1883 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1884 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1885 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1886 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1887 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1888 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1889 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1890 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1891 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1892 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1893 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1894 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1895 #define m_ATOM (1<<PROCESSOR_ATOM)
1897 #define m_GEODE (1<<PROCESSOR_GEODE)
1898 #define m_K6 (1<<PROCESSOR_K6)
1899 #define m_K6_GEODE (m_K6 | m_GEODE)
1900 #define m_K8 (1<<PROCESSOR_K8)
1901 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1902 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1903 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1904 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1905 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1906 #define m_BDVER (m_BDVER1 | m_BDVER2)
1907 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1908 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1910 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1911 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1913 /* Generic instruction choice should be common subset of supported CPUs
1914 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1915 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1917 /* Feature tests against the various tunings. */
1918 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1920 /* Feature tests against the various tunings used to create ix86_tune_features
1921 based on the processor mask. */
1922 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1923 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1924 negatively, so enabling for Generic64 seems like good code size
1925 tradeoff. We can't enable it for 32bit generic because it does not
1926 work well with PPro base chips. */
1927 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1929 /* X86_TUNE_PUSH_MEMORY */
1930 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1932 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1935 /* X86_TUNE_UNROLL_STRLEN */
1936 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1938 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1939 on simulation result. But after P4 was made, no performance benefit
1940 was observed with branch hints. It also increases the code size.
1941 As a result, icc never generates branch hints. */
1944 /* X86_TUNE_DOUBLE_WITH_ADD */
1947 /* X86_TUNE_USE_SAHF */
1948 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
1950 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1951 partial dependencies. */
1952 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1954 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1955 register stalls on Generic32 compilation setting as well. However
1956 in current implementation the partial register stalls are not eliminated
1957 very well - they can be introduced via subregs synthesized by combine
1958 and can happen in caller/callee saving sequences. Because this option
1959 pays back little on PPro based chips and is in conflict with partial reg
1960 dependencies used by Athlon/P4 based chips, it is better to leave it off
1961 for generic32 for now. */
1964 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1965 m_CORE2I7
| m_GENERIC
,
1967 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1968 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1969 m_CORE2I7
| m_GENERIC
,
1971 /* X86_TUNE_USE_HIMODE_FIOP */
1972 m_386
| m_486
| m_K6_GEODE
,
1974 /* X86_TUNE_USE_SIMODE_FIOP */
1975 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1977 /* X86_TUNE_USE_MOV0 */
1980 /* X86_TUNE_USE_CLTD */
1981 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1983 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1986 /* X86_TUNE_SPLIT_LONG_MOVES */
1989 /* X86_TUNE_READ_MODIFY_WRITE */
1992 /* X86_TUNE_READ_MODIFY */
1995 /* X86_TUNE_PROMOTE_QIMODE */
1996 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1998 /* X86_TUNE_FAST_PREFIX */
1999 ~(m_386
| m_486
| m_PENT
),
2001 /* X86_TUNE_SINGLE_STRINGOP */
2002 m_386
| m_P4_NOCONA
,
2004 /* X86_TUNE_QIMODE_MATH */
2007 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2008 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2009 might be considered for Generic32 if our scheme for avoiding partial
2010 stalls was more effective. */
2013 /* X86_TUNE_PROMOTE_QI_REGS */
2016 /* X86_TUNE_PROMOTE_HI_REGS */
2019 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2020 over esp addition. */
2021 m_386
| m_486
| m_PENT
| m_PPRO
,
2023 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2024 over esp addition. */
2027 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2028 over esp subtraction. */
2029 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2031 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2032 over esp subtraction. */
2033 m_PENT
| m_K6_GEODE
,
2035 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2036 for DFmode copies */
2037 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2039 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2040 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2042 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2043 conflict here in between PPro/Pentium4 based chips that thread 128bit
2044 SSE registers as single units versus K8 based chips that divide SSE
2045 registers to two 64bit halves. This knob promotes all store destinations
2046 to be 128bit to allow register renaming on 128bit SSE units, but usually
2047 results in one extra microop on 64bit SSE units. Experimental results
2048 shows that disabling this option on P4 brings over 20% SPECfp regression,
2049 while enabling it on K8 brings roughly 2.4% regression that can be partly
2050 masked by careful scheduling of moves. */
2051 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2053 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2054 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2056 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2059 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2062 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2063 are resolved on SSE register parts instead of whole registers, so we may
2064 maintain just lower part of scalar values in proper format leaving the
2065 upper part undefined. */
2068 /* X86_TUNE_SSE_TYPELESS_STORES */
2071 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2072 m_PPRO
| m_P4_NOCONA
,
2074 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2075 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2077 /* X86_TUNE_PROLOGUE_USING_MOVE */
2078 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2080 /* X86_TUNE_EPILOGUE_USING_MOVE */
2081 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2083 /* X86_TUNE_SHIFT1 */
2086 /* X86_TUNE_USE_FFREEP */
2089 /* X86_TUNE_INTER_UNIT_MOVES */
2090 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2092 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2093 ~(m_AMDFAM10
| m_BDVER
),
2095 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2096 than 4 branch instructions in the 16 byte window. */
2097 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2099 /* X86_TUNE_SCHEDULE */
2100 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2102 /* X86_TUNE_USE_BT */
2103 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2105 /* X86_TUNE_USE_INCDEC */
2106 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2108 /* X86_TUNE_PAD_RETURNS */
2109 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2111 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2114 /* X86_TUNE_EXT_80387_CONSTANTS */
2115 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2117 /* X86_TUNE_SHORTEN_X87_SSE */
2120 /* X86_TUNE_AVOID_VECTOR_DECODE */
2121 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2123 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2124 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2127 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2128 vector path on AMD machines. */
2129 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2131 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2133 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2135 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2139 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2140 but one byte longer. */
2143 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2144 operand that cannot be represented using a modRM byte. The XOR
2145 replacement is long decoded, so this split helps here as well. */
2148 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2150 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2152 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2153 from integer to FP. */
2156 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2157 with a subsequent conditional jump instruction into a single
2158 compare-and-branch uop. */
2161 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2162 will impact LEA instruction selection. */
2165 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2169 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2170 at -O3. For the moment, the prefetching seems badly tuned for Intel
2172 m_K6_GEODE
| m_AMD_MULTIPLE
,
2174 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2175 the auto-vectorizer. */
2178 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2179 during reassociation of integer computation. */
2182 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2183 during reassociation of fp computation. */
2187 /* Feature tests against the various architecture variations. */
2188 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2190 /* Feature tests against the various architecture variations, used to create
2191 ix86_arch_features based on the processor mask. */
2192 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2193 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2194 ~(m_386
| m_486
| m_PENT
| m_K6
),
2196 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2199 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2202 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2205 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2209 static const unsigned int x86_accumulate_outgoing_args
2210 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2212 static const unsigned int x86_arch_always_fancy_math_387
2213 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2215 static const unsigned int x86_avx256_split_unaligned_load
2216 = m_COREI7
| m_GENERIC
;
2218 static const unsigned int x86_avx256_split_unaligned_store
2219 = m_COREI7
| m_BDVER
| m_GENERIC
;
2221 /* In case the average insn count for single function invocation is
2222 lower than this constant, emit fast (but longer) prologue and
2224 #define FAST_PROLOGUE_INSN_COUNT 20
2226 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2227 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2228 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2229 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2231 /* Array of the smallest class containing reg number REGNO, indexed by
2232 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2234 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2236 /* ax, dx, cx, bx */
2237 AREG
, DREG
, CREG
, BREG
,
2238 /* si, di, bp, sp */
2239 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2241 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2242 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2245 /* flags, fpsr, fpcr, frame */
2246 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2248 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2251 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2254 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2255 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2256 /* SSE REX registers */
2257 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2261 /* The "default" register map used in 32bit mode. */
2263 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2265 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2266 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2268 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2269 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2270 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2274 /* The "default" register map used in 64bit mode. */
2276 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2278 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2279 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2280 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2281 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2282 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2283 8,9,10,11,12,13,14,15, /* extended integer registers */
2284 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2287 /* Define the register numbers to be used in Dwarf debugging information.
2288 The SVR4 reference port C compiler uses the following register numbers
2289 in its Dwarf output code:
2290 0 for %eax (gcc regno = 0)
2291 1 for %ecx (gcc regno = 2)
2292 2 for %edx (gcc regno = 1)
2293 3 for %ebx (gcc regno = 3)
2294 4 for %esp (gcc regno = 7)
2295 5 for %ebp (gcc regno = 6)
2296 6 for %esi (gcc regno = 4)
2297 7 for %edi (gcc regno = 5)
2298 The following three DWARF register numbers are never generated by
2299 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2300 believes these numbers have these meanings.
2301 8 for %eip (no gcc equivalent)
2302 9 for %eflags (gcc regno = 17)
2303 10 for %trapno (no gcc equivalent)
2304 It is not at all clear how we should number the FP stack registers
2305 for the x86 architecture. If the version of SDB on x86/svr4 were
2306 a bit less brain dead with respect to floating-point then we would
2307 have a precedent to follow with respect to DWARF register numbers
2308 for x86 FP registers, but the SDB on x86/svr4 is so completely
2309 broken with respect to FP registers that it is hardly worth thinking
2310 of it as something to strive for compatibility with.
2311 The version of x86/svr4 SDB I have at the moment does (partially)
2312 seem to believe that DWARF register number 11 is associated with
2313 the x86 register %st(0), but that's about all. Higher DWARF
2314 register numbers don't seem to be associated with anything in
2315 particular, and even for DWARF regno 11, SDB only seems to under-
2316 stand that it should say that a variable lives in %st(0) (when
2317 asked via an `=' command) if we said it was in DWARF regno 11,
2318 but SDB still prints garbage when asked for the value of the
2319 variable in question (via a `/' command).
2320 (Also note that the labels SDB prints for various FP stack regs
2321 when doing an `x' command are all wrong.)
2322 Note that these problems generally don't affect the native SVR4
2323 C compiler because it doesn't allow the use of -O with -g and
2324 because when it is *not* optimizing, it allocates a memory
2325 location for each floating-point variable, and the memory
2326 location is what gets described in the DWARF AT_location
2327 attribute for the variable in question.
2328 Regardless of the severe mental illness of the x86/svr4 SDB, we
2329 do something sensible here and we use the following DWARF
2330 register numbers. Note that these are all stack-top-relative
2332 11 for %st(0) (gcc regno = 8)
2333 12 for %st(1) (gcc regno = 9)
2334 13 for %st(2) (gcc regno = 10)
2335 14 for %st(3) (gcc regno = 11)
2336 15 for %st(4) (gcc regno = 12)
2337 16 for %st(5) (gcc regno = 13)
2338 17 for %st(6) (gcc regno = 14)
2339 18 for %st(7) (gcc regno = 15)
2341 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2343 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2344 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2345 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2346 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2347 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2348 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2349 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2352 /* Define parameter passing and return registers. */
2354 static int const x86_64_int_parameter_registers
[6] =
2356 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2359 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2361 CX_REG
, DX_REG
, R8_REG
, R9_REG
2364 static int const x86_64_int_return_registers
[4] =
2366 AX_REG
, DX_REG
, DI_REG
, SI_REG
2369 /* Define the structure for the machine field in struct function. */
2371 struct GTY(()) stack_local_entry
{
2372 unsigned short mode
;
2375 struct stack_local_entry
*next
;
2378 /* Structure describing stack frame layout.
2379 Stack grows downward:
2385 saved static chain if ix86_static_chain_on_stack
2387 saved frame pointer if frame_pointer_needed
2388 <- HARD_FRAME_POINTER
2394 <- sse_regs_save_offset
2397 [va_arg registers] |
2401 [padding2] | = to_allocate
2410 int outgoing_arguments_size
;
2412 /* The offsets relative to ARG_POINTER. */
2413 HOST_WIDE_INT frame_pointer_offset
;
2414 HOST_WIDE_INT hard_frame_pointer_offset
;
2415 HOST_WIDE_INT stack_pointer_offset
;
2416 HOST_WIDE_INT hfp_save_offset
;
2417 HOST_WIDE_INT reg_save_offset
;
2418 HOST_WIDE_INT sse_reg_save_offset
;
2420 /* When save_regs_using_mov is set, emit prologue using
2421 move instead of push instructions. */
2422 bool save_regs_using_mov
;
2425 /* Which cpu are we scheduling for. */
2426 enum attr_cpu ix86_schedule
;
2428 /* Which cpu are we optimizing for. */
2429 enum processor_type ix86_tune
;
2431 /* Which instruction set architecture to use. */
2432 enum processor_type ix86_arch
;
2434 /* true if sse prefetch instruction is not NOOP. */
2435 int x86_prefetch_sse
;
2437 /* -mstackrealign option */
2438 static const char ix86_force_align_arg_pointer_string
[]
2439 = "force_align_arg_pointer";
2441 static rtx (*ix86_gen_leave
) (void);
2442 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2443 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2444 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2445 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2446 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2447 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2448 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2449 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2450 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2451 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2452 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2454 /* Preferred alignment for stack boundary in bits. */
2455 unsigned int ix86_preferred_stack_boundary
;
2457 /* Alignment for incoming stack boundary in bits specified at
2459 static unsigned int ix86_user_incoming_stack_boundary
;
2461 /* Default alignment for incoming stack boundary in bits. */
2462 static unsigned int ix86_default_incoming_stack_boundary
;
2464 /* Alignment for incoming stack boundary in bits. */
2465 unsigned int ix86_incoming_stack_boundary
;
2467 /* Calling abi specific va_list type nodes. */
2468 static GTY(()) tree sysv_va_list_type_node
;
2469 static GTY(()) tree ms_va_list_type_node
;
2471 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2472 char internal_label_prefix
[16];
2473 int internal_label_prefix_len
;
2475 /* Fence to use after loop using movnt. */
2478 /* Register class used for passing given 64bit part of the argument.
2479 These represent classes as documented by the PS ABI, with the exception
2480 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2481 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2483 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2484 whenever possible (upper half does contain padding). */
2485 enum x86_64_reg_class
2488 X86_64_INTEGER_CLASS
,
2489 X86_64_INTEGERSI_CLASS
,
2496 X86_64_COMPLEX_X87_CLASS
,
2500 #define MAX_CLASSES 4
2502 /* Table of constants used by fldpi, fldln2, etc.... */
2503 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2504 static bool ext_80387_constants_init
= 0;
2507 static struct machine_function
* ix86_init_machine_status (void);
2508 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2509 static bool ix86_function_value_regno_p (const unsigned int);
2510 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2512 static rtx
ix86_static_chain (const_tree
, bool);
2513 static int ix86_function_regparm (const_tree
, const_tree
);
2514 static void ix86_compute_frame_layout (struct ix86_frame
*);
2515 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2517 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2518 static tree
ix86_canonical_va_list_type (tree
);
2519 static void predict_jump (int);
2520 static unsigned int split_stack_prologue_scratch_regno (void);
2521 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2523 enum ix86_function_specific_strings
2525 IX86_FUNCTION_SPECIFIC_ARCH
,
2526 IX86_FUNCTION_SPECIFIC_TUNE
,
2527 IX86_FUNCTION_SPECIFIC_MAX
2530 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2531 const char *, enum fpmath_unit
, bool);
2532 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2533 static void ix86_function_specific_save (struct cl_target_option
*);
2534 static void ix86_function_specific_restore (struct cl_target_option
*);
2535 static void ix86_function_specific_print (FILE *, int,
2536 struct cl_target_option
*);
2537 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2538 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2539 struct gcc_options
*);
2540 static bool ix86_can_inline_p (tree
, tree
);
2541 static void ix86_set_current_function (tree
);
2542 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2544 static enum calling_abi
ix86_function_abi (const_tree
);
2547 #ifndef SUBTARGET32_DEFAULT_CPU
2548 #define SUBTARGET32_DEFAULT_CPU "i386"
2551 /* The svr4 ABI for the i386 says that records and unions are returned
2553 #ifndef DEFAULT_PCC_STRUCT_RETURN
2554 #define DEFAULT_PCC_STRUCT_RETURN 1
2557 /* Whether -mtune= or -march= were specified */
2558 static int ix86_tune_defaulted
;
2559 static int ix86_arch_specified
;
2561 /* Vectorization library interface and handlers. */
2562 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2564 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2565 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2567 /* Processor target table, indexed by processor number */
2570 const struct processor_costs
*cost
; /* Processor costs */
2571 const int align_loop
; /* Default alignments. */
2572 const int align_loop_max_skip
;
2573 const int align_jump
;
2574 const int align_jump_max_skip
;
2575 const int align_func
;
2578 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2580 {&i386_cost
, 4, 3, 4, 3, 4},
2581 {&i486_cost
, 16, 15, 16, 15, 16},
2582 {&pentium_cost
, 16, 7, 16, 7, 16},
2583 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2584 {&geode_cost
, 0, 0, 0, 0, 0},
2585 {&k6_cost
, 32, 7, 32, 7, 32},
2586 {&athlon_cost
, 16, 7, 16, 7, 16},
2587 {&pentium4_cost
, 0, 0, 0, 0, 0},
2588 {&k8_cost
, 16, 7, 16, 7, 16},
2589 {&nocona_cost
, 0, 0, 0, 0, 0},
2590 /* Core 2 32-bit. */
2591 {&generic32_cost
, 16, 10, 16, 10, 16},
2592 /* Core 2 64-bit. */
2593 {&generic64_cost
, 16, 10, 16, 10, 16},
2594 /* Core i7 32-bit. */
2595 {&generic32_cost
, 16, 10, 16, 10, 16},
2596 /* Core i7 64-bit. */
2597 {&generic64_cost
, 16, 10, 16, 10, 16},
2598 {&generic32_cost
, 16, 7, 16, 7, 16},
2599 {&generic64_cost
, 16, 10, 16, 10, 16},
2600 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2601 {&bdver1_cost
, 32, 24, 32, 7, 32},
2602 {&bdver2_cost
, 32, 24, 32, 7, 32},
2603 {&btver1_cost
, 32, 24, 32, 7, 32},
2604 {&atom_cost
, 16, 15, 16, 7, 16}
2607 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2637 /* Return true if a red-zone is in use. */
2640 ix86_using_red_zone (void)
2642 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2645 /* Return a string that documents the current -m options. The caller is
2646 responsible for freeing the string. */
2649 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2650 const char *tune
, enum fpmath_unit fpmath
,
2653 struct ix86_target_opts
2655 const char *option
; /* option string */
2656 HOST_WIDE_INT mask
; /* isa mask options */
2659 /* This table is ordered so that options like -msse4.2 that imply
2660 preceding options while match those first. */
2661 static struct ix86_target_opts isa_opts
[] =
2663 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2664 { "-mfma", OPTION_MASK_ISA_FMA
},
2665 { "-mxop", OPTION_MASK_ISA_XOP
},
2666 { "-mlwp", OPTION_MASK_ISA_LWP
},
2667 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2668 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2669 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2670 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2671 { "-msse3", OPTION_MASK_ISA_SSE3
},
2672 { "-msse2", OPTION_MASK_ISA_SSE2
},
2673 { "-msse", OPTION_MASK_ISA_SSE
},
2674 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2675 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2676 { "-mmmx", OPTION_MASK_ISA_MMX
},
2677 { "-mabm", OPTION_MASK_ISA_ABM
},
2678 { "-mbmi", OPTION_MASK_ISA_BMI
},
2679 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2680 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2681 { "-mhle", OPTION_MASK_ISA_HLE
},
2682 { "-mtbm", OPTION_MASK_ISA_TBM
},
2683 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2684 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2685 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2686 { "-maes", OPTION_MASK_ISA_AES
},
2687 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2688 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2689 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2690 { "-mf16c", OPTION_MASK_ISA_F16C
},
2691 { "-mrtm", OPTION_MASK_ISA_RTM
},
2695 static struct ix86_target_opts flag_opts
[] =
2697 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2698 { "-m80387", MASK_80387
},
2699 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2700 { "-malign-double", MASK_ALIGN_DOUBLE
},
2701 { "-mcld", MASK_CLD
},
2702 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2703 { "-mieee-fp", MASK_IEEE_FP
},
2704 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2705 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2706 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2707 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2708 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2709 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2710 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2711 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2712 { "-mrecip", MASK_RECIP
},
2713 { "-mrtd", MASK_RTD
},
2714 { "-msseregparm", MASK_SSEREGPARM
},
2715 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2716 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2717 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2718 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2719 { "-mvzeroupper", MASK_VZEROUPPER
},
2720 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2721 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2722 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2725 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2728 char target_other
[40];
2738 memset (opts
, '\0', sizeof (opts
));
2740 /* Add -march= option. */
2743 opts
[num
][0] = "-march=";
2744 opts
[num
++][1] = arch
;
2747 /* Add -mtune= option. */
2750 opts
[num
][0] = "-mtune=";
2751 opts
[num
++][1] = tune
;
2754 /* Add -m32/-m64/-mx32. */
2755 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2757 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2761 isa
&= ~ (OPTION_MASK_ISA_64BIT
2762 | OPTION_MASK_ABI_64
2763 | OPTION_MASK_ABI_X32
);
2767 opts
[num
++][0] = abi
;
2769 /* Pick out the options in isa options. */
2770 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2772 if ((isa
& isa_opts
[i
].mask
) != 0)
2774 opts
[num
++][0] = isa_opts
[i
].option
;
2775 isa
&= ~ isa_opts
[i
].mask
;
2779 if (isa
&& add_nl_p
)
2781 opts
[num
++][0] = isa_other
;
2782 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2786 /* Add flag options. */
2787 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2789 if ((flags
& flag_opts
[i
].mask
) != 0)
2791 opts
[num
++][0] = flag_opts
[i
].option
;
2792 flags
&= ~ flag_opts
[i
].mask
;
2796 if (flags
&& add_nl_p
)
2798 opts
[num
++][0] = target_other
;
2799 sprintf (target_other
, "(other flags: %#x)", flags
);
2802 /* Add -fpmath= option. */
2805 opts
[num
][0] = "-mfpmath=";
2806 switch ((int) fpmath
)
2809 opts
[num
++][1] = "387";
2813 opts
[num
++][1] = "sse";
2816 case FPMATH_387
| FPMATH_SSE
:
2817 opts
[num
++][1] = "sse+387";
2829 gcc_assert (num
< ARRAY_SIZE (opts
));
2831 /* Size the string. */
2833 sep_len
= (add_nl_p
) ? 3 : 1;
2834 for (i
= 0; i
< num
; i
++)
2837 for (j
= 0; j
< 2; j
++)
2839 len
+= strlen (opts
[i
][j
]);
2842 /* Build the string. */
2843 ret
= ptr
= (char *) xmalloc (len
);
2846 for (i
= 0; i
< num
; i
++)
2850 for (j
= 0; j
< 2; j
++)
2851 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2858 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2866 for (j
= 0; j
< 2; j
++)
2869 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2871 line_len
+= len2
[j
];
2876 gcc_assert (ret
+ len
>= ptr
);
2881 /* Return true, if profiling code should be emitted before
2882 prologue. Otherwise it returns false.
2883 Note: For x86 with "hotfix" it is sorried. */
2885 ix86_profile_before_prologue (void)
2887 return flag_fentry
!= 0;
2890 /* Function that is callable from the debugger to print the current
2893 ix86_debug_options (void)
2895 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2896 ix86_arch_string
, ix86_tune_string
,
2901 fprintf (stderr
, "%s\n\n", opts
);
2905 fputs ("<no options>\n\n", stderr
);
2910 /* Override various settings based on options. If MAIN_ARGS_P, the
2911 options are from the command line, otherwise they are from
2915 ix86_option_override_internal (bool main_args_p
)
2918 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2919 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2924 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2925 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2926 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2927 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2928 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2929 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2930 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2931 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2932 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2933 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2934 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2935 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2936 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2937 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2938 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2939 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2940 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2941 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2942 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2943 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2944 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2945 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2946 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2947 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2948 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2949 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2950 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2951 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2952 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2953 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2954 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2955 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2956 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2957 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2958 /* if this reaches 64, need to widen struct pta flags below */
2962 const char *const name
; /* processor name or nickname. */
2963 const enum processor_type processor
;
2964 const enum attr_cpu schedule
;
2965 const unsigned HOST_WIDE_INT flags
;
2967 const processor_alias_table
[] =
2969 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2970 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2971 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2972 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2973 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2974 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2975 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2976 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2977 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2978 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2979 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2980 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2981 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2983 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2985 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2986 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2987 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2988 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2989 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2990 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2991 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2992 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2993 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2994 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2995 | PTA_CX16
| PTA_NO_SAHF
},
2996 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
2997 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2998 | PTA_SSSE3
| PTA_CX16
},
2999 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
3000 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3001 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
3002 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
3003 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3004 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3005 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
3006 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
3007 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3008 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3009 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3010 | PTA_RDRND
| PTA_F16C
},
3011 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
3012 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3013 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3014 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3015 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3016 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
},
3017 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3018 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3019 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
3020 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3021 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3022 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3023 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3024 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3025 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3026 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3027 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3028 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3029 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3030 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3031 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3032 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3033 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3034 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3035 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3036 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3037 {"k8", PROCESSOR_K8
, CPU_K8
,
3038 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3039 | PTA_SSE2
| PTA_NO_SAHF
},
3040 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3041 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3042 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3043 {"opteron", PROCESSOR_K8
, CPU_K8
,
3044 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3045 | PTA_SSE2
| PTA_NO_SAHF
},
3046 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3047 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3048 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3049 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3050 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3051 | PTA_SSE2
| PTA_NO_SAHF
},
3052 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3053 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3054 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3055 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3056 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3057 | PTA_SSE2
| PTA_NO_SAHF
},
3058 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3059 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3060 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3061 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3062 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3063 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3064 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3065 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3066 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3067 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3068 | PTA_XOP
| PTA_LWP
},
3069 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3070 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3071 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3072 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3073 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3075 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3076 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3077 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3078 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3079 PTA_HLE
/* flags are only used for -march switch. */ },
3080 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3082 | PTA_HLE
/* flags are only used for -march switch. */ },
3085 /* -mrecip options. */
3088 const char *string
; /* option name */
3089 unsigned int mask
; /* mask bits to set */
3091 const recip_options
[] =
3093 { "all", RECIP_MASK_ALL
},
3094 { "none", RECIP_MASK_NONE
},
3095 { "div", RECIP_MASK_DIV
},
3096 { "sqrt", RECIP_MASK_SQRT
},
3097 { "vec-div", RECIP_MASK_VEC_DIV
},
3098 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3101 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3103 /* Set up prefix/suffix so the error messages refer to either the command
3104 line argument, or the attribute(target). */
3113 prefix
= "option(\"";
3118 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3119 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3120 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3121 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3122 #ifdef TARGET_BI_ARCH
3125 #if TARGET_BI_ARCH == 1
3126 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3127 is on and OPTION_MASK_ABI_X32 is off. We turn off
3128 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3131 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3133 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3134 on and OPTION_MASK_ABI_64 is off. We turn off
3135 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3138 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3145 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3146 OPTION_MASK_ABI_64 for TARGET_X32. */
3147 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3148 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3150 else if (TARGET_LP64
)
3152 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3153 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3154 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3155 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3158 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3159 SUBTARGET_OVERRIDE_OPTIONS
;
3162 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3163 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3166 /* -fPIC is the default for x86_64. */
3167 if (TARGET_MACHO
&& TARGET_64BIT
)
3170 /* Need to check -mtune=generic first. */
3171 if (ix86_tune_string
)
3173 if (!strcmp (ix86_tune_string
, "generic")
3174 || !strcmp (ix86_tune_string
, "i686")
3175 /* As special support for cross compilers we read -mtune=native
3176 as -mtune=generic. With native compilers we won't see the
3177 -mtune=native, as it was changed by the driver. */
3178 || !strcmp (ix86_tune_string
, "native"))
3181 ix86_tune_string
= "generic64";
3183 ix86_tune_string
= "generic32";
3185 /* If this call is for setting the option attribute, allow the
3186 generic32/generic64 that was previously set. */
3187 else if (!main_args_p
3188 && (!strcmp (ix86_tune_string
, "generic32")
3189 || !strcmp (ix86_tune_string
, "generic64")))
3191 else if (!strncmp (ix86_tune_string
, "generic", 7))
3192 error ("bad value (%s) for %stune=%s %s",
3193 ix86_tune_string
, prefix
, suffix
, sw
);
3194 else if (!strcmp (ix86_tune_string
, "x86-64"))
3195 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3196 "%stune=k8%s or %stune=generic%s instead as appropriate",
3197 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3201 if (ix86_arch_string
)
3202 ix86_tune_string
= ix86_arch_string
;
3203 if (!ix86_tune_string
)
3205 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3206 ix86_tune_defaulted
= 1;
3209 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3210 need to use a sensible tune option. */
3211 if (!strcmp (ix86_tune_string
, "generic")
3212 || !strcmp (ix86_tune_string
, "x86-64")
3213 || !strcmp (ix86_tune_string
, "i686"))
3216 ix86_tune_string
= "generic64";
3218 ix86_tune_string
= "generic32";
3222 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3224 /* rep; movq isn't available in 32-bit code. */
3225 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3226 ix86_stringop_alg
= no_stringop
;
3229 if (!ix86_arch_string
)
3230 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3232 ix86_arch_specified
= 1;
3234 if (global_options_set
.x_ix86_pmode
)
3236 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3237 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3238 error ("address mode %qs not supported in the %s bit mode",
3239 TARGET_64BIT
? "short" : "long",
3240 TARGET_64BIT
? "64" : "32");
3243 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3245 if (!global_options_set
.x_ix86_abi
)
3246 ix86_abi
= DEFAULT_ABI
;
3248 if (global_options_set
.x_ix86_cmodel
)
3250 switch (ix86_cmodel
)
3255 ix86_cmodel
= CM_SMALL_PIC
;
3257 error ("code model %qs not supported in the %s bit mode",
3264 ix86_cmodel
= CM_MEDIUM_PIC
;
3266 error ("code model %qs not supported in the %s bit mode",
3268 else if (TARGET_X32
)
3269 error ("code model %qs not supported in x32 mode",
3276 ix86_cmodel
= CM_LARGE_PIC
;
3278 error ("code model %qs not supported in the %s bit mode",
3280 else if (TARGET_X32
)
3281 error ("code model %qs not supported in x32 mode",
3287 error ("code model %s does not support PIC mode", "32");
3289 error ("code model %qs not supported in the %s bit mode",
3296 error ("code model %s does not support PIC mode", "kernel");
3297 ix86_cmodel
= CM_32
;
3300 error ("code model %qs not supported in the %s bit mode",
3310 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3311 use of rip-relative addressing. This eliminates fixups that
3312 would otherwise be needed if this object is to be placed in a
3313 DLL, and is essentially just as efficient as direct addressing. */
3314 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3315 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3316 else if (TARGET_64BIT
)
3317 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3319 ix86_cmodel
= CM_32
;
3321 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3323 error ("-masm=intel not supported in this configuration");
3324 ix86_asm_dialect
= ASM_ATT
;
3326 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3327 sorry ("%i-bit mode not compiled in",
3328 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3330 for (i
= 0; i
< pta_size
; i
++)
3331 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3333 ix86_schedule
= processor_alias_table
[i
].schedule
;
3334 ix86_arch
= processor_alias_table
[i
].processor
;
3335 /* Default cpu tuning to the architecture. */
3336 ix86_tune
= ix86_arch
;
3338 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3339 error ("CPU you selected does not support x86-64 "
3342 if (processor_alias_table
[i
].flags
& PTA_MMX
3343 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3344 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3345 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3346 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3347 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3348 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3349 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3350 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3351 if (processor_alias_table
[i
].flags
& PTA_SSE
3352 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3353 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3354 if (processor_alias_table
[i
].flags
& PTA_SSE2
3355 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3356 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3357 if (processor_alias_table
[i
].flags
& PTA_SSE3
3358 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3359 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3360 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3361 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3362 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3363 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3364 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3365 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3366 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3367 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3368 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3369 if (processor_alias_table
[i
].flags
& PTA_AVX
3370 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3371 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3372 if (processor_alias_table
[i
].flags
& PTA_AVX2
3373 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3374 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3375 if (processor_alias_table
[i
].flags
& PTA_FMA
3376 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3377 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3378 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3379 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3380 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3381 if (processor_alias_table
[i
].flags
& PTA_FMA4
3382 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3383 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3384 if (processor_alias_table
[i
].flags
& PTA_XOP
3385 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3386 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3387 if (processor_alias_table
[i
].flags
& PTA_LWP
3388 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3389 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3390 if (processor_alias_table
[i
].flags
& PTA_ABM
3391 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3392 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3393 if (processor_alias_table
[i
].flags
& PTA_BMI
3394 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3395 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3396 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3397 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3398 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3399 if (processor_alias_table
[i
].flags
& PTA_TBM
3400 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3401 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3402 if (processor_alias_table
[i
].flags
& PTA_BMI2
3403 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3404 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3405 if (processor_alias_table
[i
].flags
& PTA_CX16
3406 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3407 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3408 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3409 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3410 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3411 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3412 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3413 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3414 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3415 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3416 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3417 if (processor_alias_table
[i
].flags
& PTA_AES
3418 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3419 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3420 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3421 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3422 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3423 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3424 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3425 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3426 if (processor_alias_table
[i
].flags
& PTA_RDRND
3427 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3428 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3429 if (processor_alias_table
[i
].flags
& PTA_F16C
3430 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3431 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3432 if (processor_alias_table
[i
].flags
& PTA_RTM
3433 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3434 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3435 if (processor_alias_table
[i
].flags
& PTA_HLE
3436 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3437 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3438 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3439 x86_prefetch_sse
= true;
3444 if (!strcmp (ix86_arch_string
, "generic"))
3445 error ("generic CPU can be used only for %stune=%s %s",
3446 prefix
, suffix
, sw
);
3447 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3448 error ("bad value (%s) for %sarch=%s %s",
3449 ix86_arch_string
, prefix
, suffix
, sw
);
3451 ix86_arch_mask
= 1u << ix86_arch
;
3452 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3453 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3455 for (i
= 0; i
< pta_size
; i
++)
3456 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3458 ix86_schedule
= processor_alias_table
[i
].schedule
;
3459 ix86_tune
= processor_alias_table
[i
].processor
;
3462 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3464 if (ix86_tune_defaulted
)
3466 ix86_tune_string
= "x86-64";
3467 for (i
= 0; i
< pta_size
; i
++)
3468 if (! strcmp (ix86_tune_string
,
3469 processor_alias_table
[i
].name
))
3471 ix86_schedule
= processor_alias_table
[i
].schedule
;
3472 ix86_tune
= processor_alias_table
[i
].processor
;
3475 error ("CPU you selected does not support x86-64 "
3481 /* Adjust tuning when compiling for 32-bit ABI. */
3484 case PROCESSOR_GENERIC64
:
3485 ix86_tune
= PROCESSOR_GENERIC32
;
3486 ix86_schedule
= CPU_PENTIUMPRO
;
3489 case PROCESSOR_CORE2_64
:
3490 ix86_tune
= PROCESSOR_CORE2_32
;
3493 case PROCESSOR_COREI7_64
:
3494 ix86_tune
= PROCESSOR_COREI7_32
;
3501 /* Intel CPUs have always interpreted SSE prefetch instructions as
3502 NOPs; so, we can enable SSE prefetch instructions even when
3503 -mtune (rather than -march) points us to a processor that has them.
3504 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3505 higher processors. */
3507 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3508 x86_prefetch_sse
= true;
3512 if (ix86_tune_specified
&& i
== pta_size
)
3513 error ("bad value (%s) for %stune=%s %s",
3514 ix86_tune_string
, prefix
, suffix
, sw
);
3516 ix86_tune_mask
= 1u << ix86_tune
;
3517 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3518 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3520 #ifndef USE_IX86_FRAME_POINTER
3521 #define USE_IX86_FRAME_POINTER 0
3524 #ifndef USE_X86_64_FRAME_POINTER
3525 #define USE_X86_64_FRAME_POINTER 0
3528 /* Set the default values for switches whose default depends on TARGET_64BIT
3529 in case they weren't overwritten by command line options. */
3532 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3533 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3534 if (flag_asynchronous_unwind_tables
== 2)
3535 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3536 if (flag_pcc_struct_return
== 2)
3537 flag_pcc_struct_return
= 0;
3541 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3542 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3543 if (flag_asynchronous_unwind_tables
== 2)
3544 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3545 if (flag_pcc_struct_return
== 2)
3546 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3550 ix86_cost
= &ix86_size_cost
;
3552 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3554 /* Arrange to set up i386_stack_locals for all functions. */
3555 init_machine_status
= ix86_init_machine_status
;
3557 /* Validate -mregparm= value. */
3558 if (global_options_set
.x_ix86_regparm
)
3561 warning (0, "-mregparm is ignored in 64-bit mode");
3562 if (ix86_regparm
> REGPARM_MAX
)
3564 error ("-mregparm=%d is not between 0 and %d",
3565 ix86_regparm
, REGPARM_MAX
);
3570 ix86_regparm
= REGPARM_MAX
;
3572 /* Default align_* from the processor table. */
3573 if (align_loops
== 0)
3575 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3576 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3578 if (align_jumps
== 0)
3580 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3581 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3583 if (align_functions
== 0)
3585 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3588 /* Provide default for -mbranch-cost= value. */
3589 if (!global_options_set
.x_ix86_branch_cost
)
3590 ix86_branch_cost
= ix86_cost
->branch_cost
;
3594 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3596 /* Enable by default the SSE and MMX builtins. Do allow the user to
3597 explicitly disable any of these. In particular, disabling SSE and
3598 MMX for kernel code is extremely useful. */
3599 if (!ix86_arch_specified
)
3601 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3602 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3605 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3609 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3611 if (!ix86_arch_specified
)
3613 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3615 /* i386 ABI does not specify red zone. It still makes sense to use it
3616 when programmer takes care to stack from being destroyed. */
3617 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3618 target_flags
|= MASK_NO_RED_ZONE
;
3621 /* Keep nonleaf frame pointers. */
3622 if (flag_omit_frame_pointer
)
3623 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3624 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3625 flag_omit_frame_pointer
= 1;
3627 /* If we're doing fast math, we don't care about comparison order
3628 wrt NaNs. This lets us use a shorter comparison sequence. */
3629 if (flag_finite_math_only
)
3630 target_flags
&= ~MASK_IEEE_FP
;
3632 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3633 since the insns won't need emulation. */
3634 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3635 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3637 /* Likewise, if the target doesn't have a 387, or we've specified
3638 software floating point, don't use 387 inline intrinsics. */
3640 target_flags
|= MASK_NO_FANCY_MATH_387
;
3642 /* Turn on MMX builtins for -msse. */
3645 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3646 x86_prefetch_sse
= true;
3649 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3650 if (TARGET_SSE4_2
|| TARGET_ABM
)
3651 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3653 /* Turn on lzcnt instruction for -mabm. */
3655 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3657 /* Validate -mpreferred-stack-boundary= value or default it to
3658 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3659 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3660 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3662 int min
= (TARGET_64BIT
? 4 : 2);
3663 int max
= (TARGET_SEH
? 4 : 12);
3665 if (ix86_preferred_stack_boundary_arg
< min
3666 || ix86_preferred_stack_boundary_arg
> max
)
3669 error ("-mpreferred-stack-boundary is not supported "
3672 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3673 ix86_preferred_stack_boundary_arg
, min
, max
);
3676 ix86_preferred_stack_boundary
3677 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3680 /* Set the default value for -mstackrealign. */
3681 if (ix86_force_align_arg_pointer
== -1)
3682 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3684 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3686 /* Validate -mincoming-stack-boundary= value or default it to
3687 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3688 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3689 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3691 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3692 || ix86_incoming_stack_boundary_arg
> 12)
3693 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3694 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3697 ix86_user_incoming_stack_boundary
3698 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3699 ix86_incoming_stack_boundary
3700 = ix86_user_incoming_stack_boundary
;
3704 /* Accept -msseregparm only if at least SSE support is enabled. */
3705 if (TARGET_SSEREGPARM
3707 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3709 if (global_options_set
.x_ix86_fpmath
)
3711 if (ix86_fpmath
& FPMATH_SSE
)
3715 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3716 ix86_fpmath
= FPMATH_387
;
3718 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3720 warning (0, "387 instruction set disabled, using SSE arithmetics");
3721 ix86_fpmath
= FPMATH_SSE
;
3726 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3728 /* If the i387 is disabled, then do not return values in it. */
3730 target_flags
&= ~MASK_FLOAT_RETURNS
;
3732 /* Use external vectorized library in vectorizing intrinsics. */
3733 if (global_options_set
.x_ix86_veclibabi_type
)
3734 switch (ix86_veclibabi_type
)
3736 case ix86_veclibabi_type_svml
:
3737 ix86_veclib_handler
= ix86_veclibabi_svml
;
3740 case ix86_veclibabi_type_acml
:
3741 ix86_veclib_handler
= ix86_veclibabi_acml
;
3748 if ((!USE_IX86_FRAME_POINTER
3749 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3750 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3752 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3754 /* ??? Unwind info is not correct around the CFG unless either a frame
3755 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3756 unwind info generation to be aware of the CFG and propagating states
3758 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3759 || flag_exceptions
|| flag_non_call_exceptions
)
3760 && flag_omit_frame_pointer
3761 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3763 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3764 warning (0, "unwind tables currently require either a frame pointer "
3765 "or %saccumulate-outgoing-args%s for correctness",
3767 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3770 /* If stack probes are required, the space used for large function
3771 arguments on the stack must also be probed, so enable
3772 -maccumulate-outgoing-args so this happens in the prologue. */
3773 if (TARGET_STACK_PROBE
3774 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3776 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3777 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3778 "for correctness", prefix
, suffix
);
3779 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3782 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3785 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3786 p
= strchr (internal_label_prefix
, 'X');
3787 internal_label_prefix_len
= p
- internal_label_prefix
;
3791 /* When scheduling description is not available, disable scheduler pass
3792 so it won't slow down the compilation and make x87 code slower. */
3793 if (!TARGET_SCHEDULE
)
3794 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3796 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3797 ix86_cost
->simultaneous_prefetches
,
3798 global_options
.x_param_values
,
3799 global_options_set
.x_param_values
);
3800 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
3801 global_options
.x_param_values
,
3802 global_options_set
.x_param_values
);
3803 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
3804 global_options
.x_param_values
,
3805 global_options_set
.x_param_values
);
3806 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
3807 global_options
.x_param_values
,
3808 global_options_set
.x_param_values
);
3810 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3811 if (flag_prefetch_loop_arrays
< 0
3814 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3815 flag_prefetch_loop_arrays
= 1;
3817 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3818 can be optimized to ap = __builtin_next_arg (0). */
3819 if (!TARGET_64BIT
&& !flag_split_stack
)
3820 targetm
.expand_builtin_va_start
= NULL
;
3824 ix86_gen_leave
= gen_leave_rex64
;
3825 if (Pmode
== DImode
)
3827 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3828 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3829 ix86_gen_tls_local_dynamic_base_64
3830 = gen_tls_local_dynamic_base_64_di
;
3834 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3835 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3836 ix86_gen_tls_local_dynamic_base_64
3837 = gen_tls_local_dynamic_base_64_si
;
3842 ix86_gen_leave
= gen_leave
;
3843 ix86_gen_monitor
= gen_sse3_monitor
;
3846 if (Pmode
== DImode
)
3848 ix86_gen_add3
= gen_adddi3
;
3849 ix86_gen_sub3
= gen_subdi3
;
3850 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3851 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3852 ix86_gen_andsp
= gen_anddi3
;
3853 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3854 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3855 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3859 ix86_gen_add3
= gen_addsi3
;
3860 ix86_gen_sub3
= gen_subsi3
;
3861 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3862 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3863 ix86_gen_andsp
= gen_andsi3
;
3864 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3865 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3866 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3870 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3872 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3875 if (!TARGET_64BIT
&& flag_pic
)
3877 if (flag_fentry
> 0)
3878 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3882 else if (TARGET_SEH
)
3884 if (flag_fentry
== 0)
3885 sorry ("-mno-fentry isn%'t compatible with SEH");
3888 else if (flag_fentry
< 0)
3890 #if defined(PROFILE_BEFORE_PROLOGUE)
3899 /* When not optimize for size, enable vzeroupper optimization for
3900 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3901 AVX unaligned load/store. */
3904 if (flag_expensive_optimizations
3905 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3906 target_flags
|= MASK_VZEROUPPER
;
3907 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3908 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3909 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3910 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3911 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3912 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3913 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3914 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
3915 target_flags
|= MASK_PREFER_AVX128
;
3920 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3921 target_flags
&= ~MASK_VZEROUPPER
;
3924 if (ix86_recip_name
)
3926 char *p
= ASTRDUP (ix86_recip_name
);
3928 unsigned int mask
, i
;
3931 while ((q
= strtok (p
, ",")) != NULL
)
3942 if (!strcmp (q
, "default"))
3943 mask
= RECIP_MASK_ALL
;
3946 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3947 if (!strcmp (q
, recip_options
[i
].string
))
3949 mask
= recip_options
[i
].mask
;
3953 if (i
== ARRAY_SIZE (recip_options
))
3955 error ("unknown option for -mrecip=%s", q
);
3957 mask
= RECIP_MASK_NONE
;
3961 recip_mask_explicit
|= mask
;
3963 recip_mask
&= ~mask
;
3970 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3971 else if (target_flags_explicit
& MASK_RECIP
)
3972 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3974 /* Save the initial options in case the user does function specific
3977 target_option_default_node
= target_option_current_node
3978 = build_target_option_node ();
3981 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3984 function_pass_avx256_p (const_rtx val
)
3989 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
3992 if (GET_CODE (val
) == PARALLEL
)
3997 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
3999 r
= XVECEXP (val
, 0, i
);
4000 if (GET_CODE (r
) == EXPR_LIST
4002 && REG_P (XEXP (r
, 0))
4003 && (GET_MODE (XEXP (r
, 0)) == OImode
4004 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
4012 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4015 ix86_option_override (void)
4017 ix86_option_override_internal (true);
4020 /* Update register usage after having seen the compiler flags. */
4023 ix86_conditional_register_usage (void)
4028 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4030 if (fixed_regs
[i
] > 1)
4031 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4032 if (call_used_regs
[i
] > 1)
4033 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4036 /* The PIC register, if it exists, is fixed. */
4037 j
= PIC_OFFSET_TABLE_REGNUM
;
4038 if (j
!= INVALID_REGNUM
)
4039 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4041 /* The 64-bit MS_ABI changes the set of call-used registers. */
4042 if (TARGET_64BIT_MS_ABI
)
4044 call_used_regs
[SI_REG
] = 0;
4045 call_used_regs
[DI_REG
] = 0;
4046 call_used_regs
[XMM6_REG
] = 0;
4047 call_used_regs
[XMM7_REG
] = 0;
4048 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4049 call_used_regs
[i
] = 0;
4052 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4053 other call-clobbered regs for 64-bit. */
4056 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4058 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4059 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4060 && call_used_regs
[i
])
4061 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4064 /* If MMX is disabled, squash the registers. */
4066 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4067 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4068 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4070 /* If SSE is disabled, squash the registers. */
4072 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4073 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4074 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4076 /* If the FPU is disabled, squash the registers. */
4077 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4078 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4079 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4080 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4082 /* If 32-bit, squash the 64-bit registers. */
4085 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4087 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4093 /* Save the current options */
4096 ix86_function_specific_save (struct cl_target_option
*ptr
)
4098 ptr
->arch
= ix86_arch
;
4099 ptr
->schedule
= ix86_schedule
;
4100 ptr
->tune
= ix86_tune
;
4101 ptr
->branch_cost
= ix86_branch_cost
;
4102 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4103 ptr
->arch_specified
= ix86_arch_specified
;
4104 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4105 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4106 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4108 /* The fields are char but the variables are not; make sure the
4109 values fit in the fields. */
4110 gcc_assert (ptr
->arch
== ix86_arch
);
4111 gcc_assert (ptr
->schedule
== ix86_schedule
);
4112 gcc_assert (ptr
->tune
== ix86_tune
);
4113 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4116 /* Restore the current options */
4119 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4121 enum processor_type old_tune
= ix86_tune
;
4122 enum processor_type old_arch
= ix86_arch
;
4123 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4126 ix86_arch
= (enum processor_type
) ptr
->arch
;
4127 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4128 ix86_tune
= (enum processor_type
) ptr
->tune
;
4129 ix86_branch_cost
= ptr
->branch_cost
;
4130 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4131 ix86_arch_specified
= ptr
->arch_specified
;
4132 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4133 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4134 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4136 /* Recreate the arch feature tests if the arch changed */
4137 if (old_arch
!= ix86_arch
)
4139 ix86_arch_mask
= 1u << ix86_arch
;
4140 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4141 ix86_arch_features
[i
]
4142 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4145 /* Recreate the tune optimization tests */
4146 if (old_tune
!= ix86_tune
)
4148 ix86_tune_mask
= 1u << ix86_tune
;
4149 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4150 ix86_tune_features
[i
]
4151 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4155 /* Print the current options */
4158 ix86_function_specific_print (FILE *file
, int indent
,
4159 struct cl_target_option
*ptr
)
4162 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4163 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4165 fprintf (file
, "%*sarch = %d (%s)\n",
4168 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4169 ? cpu_names
[ptr
->arch
]
4172 fprintf (file
, "%*stune = %d (%s)\n",
4175 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4176 ? cpu_names
[ptr
->tune
]
4179 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4183 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4184 free (target_string
);
4189 /* Inner function to process the attribute((target(...))), take an argument and
4190 set the current options from the argument. If we have a list, recursively go
4194 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4195 struct gcc_options
*enum_opts_set
)
4200 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4201 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4202 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4203 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4204 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4220 enum ix86_opt_type type
;
4225 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4226 IX86_ATTR_ISA ("abm", OPT_mabm
),
4227 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4228 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4229 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4230 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4231 IX86_ATTR_ISA ("aes", OPT_maes
),
4232 IX86_ATTR_ISA ("avx", OPT_mavx
),
4233 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4234 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4235 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4236 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4237 IX86_ATTR_ISA ("sse", OPT_msse
),
4238 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4239 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4240 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4241 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4242 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4243 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4244 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4245 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4246 IX86_ATTR_ISA ("fma", OPT_mfma
),
4247 IX86_ATTR_ISA ("xop", OPT_mxop
),
4248 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4249 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4250 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4251 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4252 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4253 IX86_ATTR_ISA ("hle", OPT_mhle
),
4256 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4258 /* string options */
4259 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4260 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4263 IX86_ATTR_YES ("cld",
4267 IX86_ATTR_NO ("fancy-math-387",
4268 OPT_mfancy_math_387
,
4269 MASK_NO_FANCY_MATH_387
),
4271 IX86_ATTR_YES ("ieee-fp",
4275 IX86_ATTR_YES ("inline-all-stringops",
4276 OPT_minline_all_stringops
,
4277 MASK_INLINE_ALL_STRINGOPS
),
4279 IX86_ATTR_YES ("inline-stringops-dynamically",
4280 OPT_minline_stringops_dynamically
,
4281 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4283 IX86_ATTR_NO ("align-stringops",
4284 OPT_mno_align_stringops
,
4285 MASK_NO_ALIGN_STRINGOPS
),
4287 IX86_ATTR_YES ("recip",
4293 /* If this is a list, recurse to get the options. */
4294 if (TREE_CODE (args
) == TREE_LIST
)
4298 for (; args
; args
= TREE_CHAIN (args
))
4299 if (TREE_VALUE (args
)
4300 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4301 p_strings
, enum_opts_set
))
4307 else if (TREE_CODE (args
) != STRING_CST
)
4310 /* Handle multiple arguments separated by commas. */
4311 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4313 while (next_optstr
&& *next_optstr
!= '\0')
4315 char *p
= next_optstr
;
4317 char *comma
= strchr (next_optstr
, ',');
4318 const char *opt_string
;
4319 size_t len
, opt_len
;
4324 enum ix86_opt_type type
= ix86_opt_unknown
;
4330 len
= comma
- next_optstr
;
4331 next_optstr
= comma
+ 1;
4339 /* Recognize no-xxx. */
4340 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4349 /* Find the option. */
4352 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4354 type
= attrs
[i
].type
;
4355 opt_len
= attrs
[i
].len
;
4356 if (ch
== attrs
[i
].string
[0]
4357 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4360 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4363 mask
= attrs
[i
].mask
;
4364 opt_string
= attrs
[i
].string
;
4369 /* Process the option. */
4372 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4376 else if (type
== ix86_opt_isa
)
4378 struct cl_decoded_option decoded
;
4380 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4381 ix86_handle_option (&global_options
, &global_options_set
,
4382 &decoded
, input_location
);
4385 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4387 if (type
== ix86_opt_no
)
4388 opt_set_p
= !opt_set_p
;
4391 target_flags
|= mask
;
4393 target_flags
&= ~mask
;
4396 else if (type
== ix86_opt_str
)
4400 error ("option(\"%s\") was already specified", opt_string
);
4404 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4407 else if (type
== ix86_opt_enum
)
4412 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4414 set_option (&global_options
, enum_opts_set
, opt
, value
,
4415 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4419 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4431 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4434 ix86_valid_target_attribute_tree (tree args
)
4436 const char *orig_arch_string
= ix86_arch_string
;
4437 const char *orig_tune_string
= ix86_tune_string
;
4438 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4439 int orig_tune_defaulted
= ix86_tune_defaulted
;
4440 int orig_arch_specified
= ix86_arch_specified
;
4441 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4444 struct cl_target_option
*def
4445 = TREE_TARGET_OPTION (target_option_default_node
);
4446 struct gcc_options enum_opts_set
;
4448 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4450 /* Process each of the options on the chain. */
4451 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4455 /* If the changed options are different from the default, rerun
4456 ix86_option_override_internal, and then save the options away.
4457 The string options are are attribute options, and will be undone
4458 when we copy the save structure. */
4459 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4460 || target_flags
!= def
->x_target_flags
4461 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4462 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4463 || enum_opts_set
.x_ix86_fpmath
)
4465 /* If we are using the default tune= or arch=, undo the string assigned,
4466 and use the default. */
4467 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4468 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4469 else if (!orig_arch_specified
)
4470 ix86_arch_string
= NULL
;
4472 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4473 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4474 else if (orig_tune_defaulted
)
4475 ix86_tune_string
= NULL
;
4477 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4478 if (enum_opts_set
.x_ix86_fpmath
)
4479 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4480 else if (!TARGET_64BIT
&& TARGET_SSE
)
4482 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4483 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4486 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4487 ix86_option_override_internal (false);
4489 /* Add any builtin functions with the new isa if any. */
4490 ix86_add_new_builtins (ix86_isa_flags
);
4492 /* Save the current options unless we are validating options for
4494 t
= build_target_option_node ();
4496 ix86_arch_string
= orig_arch_string
;
4497 ix86_tune_string
= orig_tune_string
;
4498 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4500 /* Free up memory allocated to hold the strings */
4501 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4502 free (option_strings
[i
]);
4508 /* Hook to validate attribute((target("string"))). */
4511 ix86_valid_target_attribute_p (tree fndecl
,
4512 tree
ARG_UNUSED (name
),
4514 int ARG_UNUSED (flags
))
4516 struct cl_target_option cur_target
;
4518 tree old_optimize
= build_optimization_node ();
4519 tree new_target
, new_optimize
;
4520 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4522 /* If the function changed the optimization levels as well as setting target
4523 options, start with the optimizations specified. */
4524 if (func_optimize
&& func_optimize
!= old_optimize
)
4525 cl_optimization_restore (&global_options
,
4526 TREE_OPTIMIZATION (func_optimize
));
4528 /* The target attributes may also change some optimization flags, so update
4529 the optimization options if necessary. */
4530 cl_target_option_save (&cur_target
, &global_options
);
4531 new_target
= ix86_valid_target_attribute_tree (args
);
4532 new_optimize
= build_optimization_node ();
4539 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4541 if (old_optimize
!= new_optimize
)
4542 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4545 cl_target_option_restore (&global_options
, &cur_target
);
4547 if (old_optimize
!= new_optimize
)
4548 cl_optimization_restore (&global_options
,
4549 TREE_OPTIMIZATION (old_optimize
));
4555 /* Hook to determine if one function can safely inline another. */
4558 ix86_can_inline_p (tree caller
, tree callee
)
4561 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4562 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4564 /* If callee has no option attributes, then it is ok to inline. */
4568 /* If caller has no option attributes, but callee does then it is not ok to
4570 else if (!caller_tree
)
4575 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4576 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4578 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4579 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4581 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4582 != callee_opts
->x_ix86_isa_flags
)
4585 /* See if we have the same non-isa options. */
4586 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4589 /* See if arch, tune, etc. are the same. */
4590 else if (caller_opts
->arch
!= callee_opts
->arch
)
4593 else if (caller_opts
->tune
!= callee_opts
->tune
)
4596 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4599 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4610 /* Remember the last target of ix86_set_current_function. */
4611 static GTY(()) tree ix86_previous_fndecl
;
4613 /* Establish appropriate back-end context for processing the function
4614 FNDECL. The argument might be NULL to indicate processing at top
4615 level, outside of any function scope. */
4617 ix86_set_current_function (tree fndecl
)
4619 /* Only change the context if the function changes. This hook is called
4620 several times in the course of compiling a function, and we don't want to
4621 slow things down too much or call target_reinit when it isn't safe. */
4622 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4624 tree old_tree
= (ix86_previous_fndecl
4625 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4628 tree new_tree
= (fndecl
4629 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4632 ix86_previous_fndecl
= fndecl
;
4633 if (old_tree
== new_tree
)
4638 cl_target_option_restore (&global_options
,
4639 TREE_TARGET_OPTION (new_tree
));
4645 struct cl_target_option
*def
4646 = TREE_TARGET_OPTION (target_option_current_node
);
4648 cl_target_option_restore (&global_options
, def
);
4655 /* Return true if this goes in large data/bss. */
4658 ix86_in_large_data_p (tree exp
)
4660 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4663 /* Functions are never large data. */
4664 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4667 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4669 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4670 if (strcmp (section
, ".ldata") == 0
4671 || strcmp (section
, ".lbss") == 0)
4677 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4679 /* If this is an incomplete type with size 0, then we can't put it
4680 in data because it might be too big when completed. */
4681 if (!size
|| size
> ix86_section_threshold
)
4688 /* Switch to the appropriate section for output of DECL.
4689 DECL is either a `VAR_DECL' node or a constant of some sort.
4690 RELOC indicates whether forming the initial value of DECL requires
4691 link-time relocations. */
4693 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4697 x86_64_elf_select_section (tree decl
, int reloc
,
4698 unsigned HOST_WIDE_INT align
)
4700 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4701 && ix86_in_large_data_p (decl
))
4703 const char *sname
= NULL
;
4704 unsigned int flags
= SECTION_WRITE
;
4705 switch (categorize_decl_for_section (decl
, reloc
))
4710 case SECCAT_DATA_REL
:
4711 sname
= ".ldata.rel";
4713 case SECCAT_DATA_REL_LOCAL
:
4714 sname
= ".ldata.rel.local";
4716 case SECCAT_DATA_REL_RO
:
4717 sname
= ".ldata.rel.ro";
4719 case SECCAT_DATA_REL_RO_LOCAL
:
4720 sname
= ".ldata.rel.ro.local";
4724 flags
|= SECTION_BSS
;
4727 case SECCAT_RODATA_MERGE_STR
:
4728 case SECCAT_RODATA_MERGE_STR_INIT
:
4729 case SECCAT_RODATA_MERGE_CONST
:
4733 case SECCAT_SRODATA
:
4740 /* We don't split these for medium model. Place them into
4741 default sections and hope for best. */
4746 /* We might get called with string constants, but get_named_section
4747 doesn't like them as they are not DECLs. Also, we need to set
4748 flags in that case. */
4750 return get_section (sname
, flags
, NULL
);
4751 return get_named_section (decl
, sname
, reloc
);
4754 return default_elf_select_section (decl
, reloc
, align
);
4757 /* Build up a unique section name, expressed as a
4758 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4759 RELOC indicates whether the initial value of EXP requires
4760 link-time relocations. */
4762 static void ATTRIBUTE_UNUSED
4763 x86_64_elf_unique_section (tree decl
, int reloc
)
4765 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4766 && ix86_in_large_data_p (decl
))
4768 const char *prefix
= NULL
;
4769 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4770 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4772 switch (categorize_decl_for_section (decl
, reloc
))
4775 case SECCAT_DATA_REL
:
4776 case SECCAT_DATA_REL_LOCAL
:
4777 case SECCAT_DATA_REL_RO
:
4778 case SECCAT_DATA_REL_RO_LOCAL
:
4779 prefix
= one_only
? ".ld" : ".ldata";
4782 prefix
= one_only
? ".lb" : ".lbss";
4785 case SECCAT_RODATA_MERGE_STR
:
4786 case SECCAT_RODATA_MERGE_STR_INIT
:
4787 case SECCAT_RODATA_MERGE_CONST
:
4788 prefix
= one_only
? ".lr" : ".lrodata";
4790 case SECCAT_SRODATA
:
4797 /* We don't split these for medium model. Place them into
4798 default sections and hope for best. */
4803 const char *name
, *linkonce
;
4806 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4807 name
= targetm
.strip_name_encoding (name
);
4809 /* If we're using one_only, then there needs to be a .gnu.linkonce
4810 prefix to the section name. */
4811 linkonce
= one_only
? ".gnu.linkonce" : "";
4813 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4815 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4819 default_unique_section (decl
, reloc
);
4822 #ifdef COMMON_ASM_OP
4823 /* This says how to output assembler code to declare an
4824 uninitialized external linkage data object.
4826 For medium model x86-64 we need to use .largecomm opcode for
4829 x86_elf_aligned_common (FILE *file
,
4830 const char *name
, unsigned HOST_WIDE_INT size
,
4833 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4834 && size
> (unsigned int)ix86_section_threshold
)
4835 fputs (".largecomm\t", file
);
4837 fputs (COMMON_ASM_OP
, file
);
4838 assemble_name (file
, name
);
4839 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4840 size
, align
/ BITS_PER_UNIT
);
4844 /* Utility function for targets to use in implementing
4845 ASM_OUTPUT_ALIGNED_BSS. */
4848 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4849 const char *name
, unsigned HOST_WIDE_INT size
,
4852 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4853 && size
> (unsigned int)ix86_section_threshold
)
4854 switch_to_section (get_named_section (decl
, ".lbss", 0));
4856 switch_to_section (bss_section
);
4857 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4858 #ifdef ASM_DECLARE_OBJECT_NAME
4859 last_assemble_variable_decl
= decl
;
4860 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4862 /* Standard thing is just output label for the object. */
4863 ASM_OUTPUT_LABEL (file
, name
);
4864 #endif /* ASM_DECLARE_OBJECT_NAME */
4865 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4868 /* Decide whether we must probe the stack before any space allocation
4869 on this target. It's essentially TARGET_STACK_PROBE except when
4870 -fstack-check causes the stack to be already probed differently. */
4873 ix86_target_stack_probe (void)
4875 /* Do not probe the stack twice if static stack checking is enabled. */
4876 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4879 return TARGET_STACK_PROBE
;
4882 /* Decide whether we can make a sibling call to a function. DECL is the
4883 declaration of the function being targeted by the call and EXP is the
4884 CALL_EXPR representing the call. */
4887 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4889 tree type
, decl_or_type
;
4892 /* If we are generating position-independent code, we cannot sibcall
4893 optimize any indirect call, or a direct call to a global function,
4894 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4898 && (!decl
|| !targetm
.binds_local_p (decl
)))
4901 /* If we need to align the outgoing stack, then sibcalling would
4902 unalign the stack, which may break the called function. */
4903 if (ix86_minimum_incoming_stack_boundary (true)
4904 < PREFERRED_STACK_BOUNDARY
)
4909 decl_or_type
= decl
;
4910 type
= TREE_TYPE (decl
);
4914 /* We're looking at the CALL_EXPR, we need the type of the function. */
4915 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4916 type
= TREE_TYPE (type
); /* pointer type */
4917 type
= TREE_TYPE (type
); /* function type */
4918 decl_or_type
= type
;
4921 /* Check that the return value locations are the same. Like
4922 if we are returning floats on the 80387 register stack, we cannot
4923 make a sibcall from a function that doesn't return a float to a
4924 function that does or, conversely, from a function that does return
4925 a float to a function that doesn't; the necessary stack adjustment
4926 would not be executed. This is also the place we notice
4927 differences in the return value ABI. Note that it is ok for one
4928 of the functions to have void return type as long as the return
4929 value of the other is passed in a register. */
4930 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4931 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4933 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4935 if (!rtx_equal_p (a
, b
))
4938 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4940 /* Disable sibcall if we need to generate vzeroupper after
4942 if (TARGET_VZEROUPPER
4943 && cfun
->machine
->callee_return_avx256_p
4944 && !cfun
->machine
->caller_return_avx256_p
)
4947 else if (!rtx_equal_p (a
, b
))
4952 /* The SYSV ABI has more call-clobbered registers;
4953 disallow sibcalls from MS to SYSV. */
4954 if (cfun
->machine
->call_abi
== MS_ABI
4955 && ix86_function_type_abi (type
) == SYSV_ABI
)
4960 /* If this call is indirect, we'll need to be able to use a
4961 call-clobbered register for the address of the target function.
4962 Make sure that all such registers are not used for passing
4963 parameters. Note that DLLIMPORT functions are indirect. */
4965 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4967 if (ix86_function_regparm (type
, NULL
) >= 3)
4969 /* ??? Need to count the actual number of registers to be used,
4970 not the possible number of registers. Fix later. */
4976 /* Otherwise okay. That also includes certain types of indirect calls. */
4980 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4981 and "sseregparm" calling convention attributes;
4982 arguments as in struct attribute_spec.handler. */
4985 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4987 int flags ATTRIBUTE_UNUSED
,
4990 if (TREE_CODE (*node
) != FUNCTION_TYPE
4991 && TREE_CODE (*node
) != METHOD_TYPE
4992 && TREE_CODE (*node
) != FIELD_DECL
4993 && TREE_CODE (*node
) != TYPE_DECL
)
4995 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4997 *no_add_attrs
= true;
5001 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5002 if (is_attribute_p ("regparm", name
))
5006 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5008 error ("fastcall and regparm attributes are not compatible");
5011 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5013 error ("regparam and thiscall attributes are not compatible");
5016 cst
= TREE_VALUE (args
);
5017 if (TREE_CODE (cst
) != INTEGER_CST
)
5019 warning (OPT_Wattributes
,
5020 "%qE attribute requires an integer constant argument",
5022 *no_add_attrs
= true;
5024 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5026 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5028 *no_add_attrs
= true;
5036 /* Do not warn when emulating the MS ABI. */
5037 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5038 && TREE_CODE (*node
) != METHOD_TYPE
)
5039 || ix86_function_type_abi (*node
) != MS_ABI
)
5040 warning (OPT_Wattributes
, "%qE attribute ignored",
5042 *no_add_attrs
= true;
5046 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5047 if (is_attribute_p ("fastcall", name
))
5049 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5051 error ("fastcall and cdecl attributes are not compatible");
5053 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5055 error ("fastcall and stdcall attributes are not compatible");
5057 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5059 error ("fastcall and regparm attributes are not compatible");
5061 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5063 error ("fastcall and thiscall attributes are not compatible");
5067 /* Can combine stdcall with fastcall (redundant), regparm and
5069 else if (is_attribute_p ("stdcall", name
))
5071 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5073 error ("stdcall and cdecl attributes are not compatible");
5075 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5077 error ("stdcall and fastcall attributes are not compatible");
5079 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5081 error ("stdcall and thiscall attributes are not compatible");
5085 /* Can combine cdecl with regparm and sseregparm. */
5086 else if (is_attribute_p ("cdecl", name
))
5088 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5090 error ("stdcall and cdecl attributes are not compatible");
5092 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5094 error ("fastcall and cdecl attributes are not compatible");
5096 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5098 error ("cdecl and thiscall attributes are not compatible");
5101 else if (is_attribute_p ("thiscall", name
))
5103 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5104 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5106 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5108 error ("stdcall and thiscall attributes are not compatible");
5110 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5112 error ("fastcall and thiscall attributes are not compatible");
5114 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5116 error ("cdecl and thiscall attributes are not compatible");
5120 /* Can combine sseregparm with all attributes. */
5125 /* The transactional memory builtins are implicitly regparm or fastcall
5126 depending on the ABI. Override the generic do-nothing attribute that
5127 these builtins were declared with, and replace it with one of the two
5128 attributes that we expect elsewhere. */
5131 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5132 tree args ATTRIBUTE_UNUSED
,
5133 int flags ATTRIBUTE_UNUSED
,
5138 /* In no case do we want to add the placeholder attribute. */
5139 *no_add_attrs
= true;
5141 /* The 64-bit ABI is unchanged for transactional memory. */
5145 /* ??? Is there a better way to validate 32-bit windows? We have
5146 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5147 if (CHECK_STACK_LIMIT
> 0)
5148 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5151 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5152 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5154 decl_attributes (node
, alt
, flags
);
5159 /* This function determines from TYPE the calling-convention. */
5162 ix86_get_callcvt (const_tree type
)
5164 unsigned int ret
= 0;
5169 return IX86_CALLCVT_CDECL
;
5171 attrs
= TYPE_ATTRIBUTES (type
);
5172 if (attrs
!= NULL_TREE
)
5174 if (lookup_attribute ("cdecl", attrs
))
5175 ret
|= IX86_CALLCVT_CDECL
;
5176 else if (lookup_attribute ("stdcall", attrs
))
5177 ret
|= IX86_CALLCVT_STDCALL
;
5178 else if (lookup_attribute ("fastcall", attrs
))
5179 ret
|= IX86_CALLCVT_FASTCALL
;
5180 else if (lookup_attribute ("thiscall", attrs
))
5181 ret
|= IX86_CALLCVT_THISCALL
;
5183 /* Regparam isn't allowed for thiscall and fastcall. */
5184 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5186 if (lookup_attribute ("regparm", attrs
))
5187 ret
|= IX86_CALLCVT_REGPARM
;
5188 if (lookup_attribute ("sseregparm", attrs
))
5189 ret
|= IX86_CALLCVT_SSEREGPARM
;
5192 if (IX86_BASE_CALLCVT(ret
) != 0)
5196 is_stdarg
= stdarg_p (type
);
5197 if (TARGET_RTD
&& !is_stdarg
)
5198 return IX86_CALLCVT_STDCALL
| ret
;
5202 || TREE_CODE (type
) != METHOD_TYPE
5203 || ix86_function_type_abi (type
) != MS_ABI
)
5204 return IX86_CALLCVT_CDECL
| ret
;
5206 return IX86_CALLCVT_THISCALL
;
5209 /* Return 0 if the attributes for two types are incompatible, 1 if they
5210 are compatible, and 2 if they are nearly compatible (which causes a
5211 warning to be generated). */
5214 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5216 unsigned int ccvt1
, ccvt2
;
5218 if (TREE_CODE (type1
) != FUNCTION_TYPE
5219 && TREE_CODE (type1
) != METHOD_TYPE
)
5222 ccvt1
= ix86_get_callcvt (type1
);
5223 ccvt2
= ix86_get_callcvt (type2
);
5226 if (ix86_function_regparm (type1
, NULL
)
5227 != ix86_function_regparm (type2
, NULL
))
5233 /* Return the regparm value for a function with the indicated TYPE and DECL.
5234 DECL may be NULL when calling function indirectly
5235 or considering a libcall. */
5238 ix86_function_regparm (const_tree type
, const_tree decl
)
5245 return (ix86_function_type_abi (type
) == SYSV_ABI
5246 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5247 ccvt
= ix86_get_callcvt (type
);
5248 regparm
= ix86_regparm
;
5250 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5252 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5255 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5259 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5261 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5264 /* Use register calling convention for local functions when possible. */
5266 && TREE_CODE (decl
) == FUNCTION_DECL
5268 && !(profile_flag
&& !flag_fentry
))
5270 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5271 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5272 if (i
&& i
->local
&& i
->can_change_signature
)
5274 int local_regparm
, globals
= 0, regno
;
5276 /* Make sure no regparm register is taken by a
5277 fixed register variable. */
5278 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5279 if (fixed_regs
[local_regparm
])
5282 /* We don't want to use regparm(3) for nested functions as
5283 these use a static chain pointer in the third argument. */
5284 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5287 /* In 32-bit mode save a register for the split stack. */
5288 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5291 /* Each fixed register usage increases register pressure,
5292 so less registers should be used for argument passing.
5293 This functionality can be overriden by an explicit
5295 for (regno
= 0; regno
<= DI_REG
; regno
++)
5296 if (fixed_regs
[regno
])
5300 = globals
< local_regparm
? local_regparm
- globals
: 0;
5302 if (local_regparm
> regparm
)
5303 regparm
= local_regparm
;
5310 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5311 DFmode (2) arguments in SSE registers for a function with the
5312 indicated TYPE and DECL. DECL may be NULL when calling function
5313 indirectly or considering a libcall. Otherwise return 0. */
5316 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5318 gcc_assert (!TARGET_64BIT
);
5320 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5321 by the sseregparm attribute. */
5322 if (TARGET_SSEREGPARM
5323 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5330 error ("calling %qD with attribute sseregparm without "
5331 "SSE/SSE2 enabled", decl
);
5333 error ("calling %qT with attribute sseregparm without "
5334 "SSE/SSE2 enabled", type
);
5342 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5343 (and DFmode for SSE2) arguments in SSE registers. */
5344 if (decl
&& TARGET_SSE_MATH
&& optimize
5345 && !(profile_flag
&& !flag_fentry
))
5347 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5348 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5349 if (i
&& i
->local
&& i
->can_change_signature
)
5350 return TARGET_SSE2
? 2 : 1;
5356 /* Return true if EAX is live at the start of the function. Used by
5357 ix86_expand_prologue to determine if we need special help before
5358 calling allocate_stack_worker. */
5361 ix86_eax_live_at_start_p (void)
5363 /* Cheat. Don't bother working forward from ix86_function_regparm
5364 to the function type to whether an actual argument is located in
5365 eax. Instead just look at cfg info, which is still close enough
5366 to correct at this point. This gives false positives for broken
5367 functions that might use uninitialized data that happens to be
5368 allocated in eax, but who cares? */
5369 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5373 ix86_keep_aggregate_return_pointer (tree fntype
)
5379 attr
= lookup_attribute ("callee_pop_aggregate_return",
5380 TYPE_ATTRIBUTES (fntype
));
5382 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5384 /* For 32-bit MS-ABI the default is to keep aggregate
5386 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5389 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5392 /* Value is the number of bytes of arguments automatically
5393 popped when returning from a subroutine call.
5394 FUNDECL is the declaration node of the function (as a tree),
5395 FUNTYPE is the data type of the function (as a tree),
5396 or for a library call it is an identifier node for the subroutine name.
5397 SIZE is the number of bytes of arguments passed on the stack.
5399 On the 80386, the RTD insn may be used to pop them if the number
5400 of args is fixed, but if the number is variable then the caller
5401 must pop them all. RTD can't be used for library calls now
5402 because the library is compiled with the Unix compiler.
5403 Use of RTD is a selectable option, since it is incompatible with
5404 standard Unix calling sequences. If the option is not selected,
5405 the caller must always pop the args.
5407 The attribute stdcall is equivalent to RTD on a per module basis. */
5410 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5414 /* None of the 64-bit ABIs pop arguments. */
5418 ccvt
= ix86_get_callcvt (funtype
);
5420 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5421 | IX86_CALLCVT_THISCALL
)) != 0
5422 && ! stdarg_p (funtype
))
5425 /* Lose any fake structure return argument if it is passed on the stack. */
5426 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5427 && !ix86_keep_aggregate_return_pointer (funtype
))
5429 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5431 return GET_MODE_SIZE (Pmode
);
5437 /* Argument support functions. */
5439 /* Return true when register may be used to pass function parameters. */
5441 ix86_function_arg_regno_p (int regno
)
5444 const int *parm_regs
;
5449 return (regno
< REGPARM_MAX
5450 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5452 return (regno
< REGPARM_MAX
5453 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5454 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5455 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5456 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5461 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5466 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5467 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5471 /* TODO: The function should depend on current function ABI but
5472 builtins.c would need updating then. Therefore we use the
5475 /* RAX is used as hidden argument to va_arg functions. */
5476 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5479 if (ix86_abi
== MS_ABI
)
5480 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5482 parm_regs
= x86_64_int_parameter_registers
;
5483 for (i
= 0; i
< (ix86_abi
== MS_ABI
5484 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5485 if (regno
== parm_regs
[i
])
5490 /* Return if we do not know how to pass TYPE solely in registers. */
5493 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5495 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5498 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5499 The layout_type routine is crafty and tries to trick us into passing
5500 currently unsupported vector types on the stack by using TImode. */
5501 return (!TARGET_64BIT
&& mode
== TImode
5502 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5505 /* It returns the size, in bytes, of the area reserved for arguments passed
5506 in registers for the function represented by fndecl dependent to the used
5509 ix86_reg_parm_stack_space (const_tree fndecl
)
5511 enum calling_abi call_abi
= SYSV_ABI
;
5512 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5513 call_abi
= ix86_function_abi (fndecl
);
5515 call_abi
= ix86_function_type_abi (fndecl
);
5516 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5521 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5524 ix86_function_type_abi (const_tree fntype
)
5526 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5528 enum calling_abi abi
= ix86_abi
;
5529 if (abi
== SYSV_ABI
)
5531 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5534 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5542 ix86_function_ms_hook_prologue (const_tree fn
)
5544 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5546 if (decl_function_context (fn
) != NULL_TREE
)
5547 error_at (DECL_SOURCE_LOCATION (fn
),
5548 "ms_hook_prologue is not compatible with nested function");
5555 static enum calling_abi
5556 ix86_function_abi (const_tree fndecl
)
5560 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5563 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5566 ix86_cfun_abi (void)
5570 return cfun
->machine
->call_abi
;
5573 /* Write the extra assembler code needed to declare a function properly. */
5576 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5579 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5583 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5584 unsigned int filler_cc
= 0xcccccccc;
5586 for (i
= 0; i
< filler_count
; i
+= 4)
5587 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5590 #ifdef SUBTARGET_ASM_UNWIND_INIT
5591 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5594 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5596 /* Output magic byte marker, if hot-patch attribute is set. */
5601 /* leaq [%rsp + 0], %rsp */
5602 asm_fprintf (asm_out_file
, ASM_BYTE
5603 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5607 /* movl.s %edi, %edi
5609 movl.s %esp, %ebp */
5610 asm_fprintf (asm_out_file
, ASM_BYTE
5611 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5617 extern void init_regs (void);
5619 /* Implementation of call abi switching target hook. Specific to FNDECL
5620 the specific call register sets are set. See also
5621 ix86_conditional_register_usage for more details. */
5623 ix86_call_abi_override (const_tree fndecl
)
5625 if (fndecl
== NULL_TREE
)
5626 cfun
->machine
->call_abi
= ix86_abi
;
5628 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5631 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5632 expensive re-initialization of init_regs each time we switch function context
5633 since this is needed only during RTL expansion. */
5635 ix86_maybe_switch_abi (void)
5638 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5642 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5643 for a call to a function whose data type is FNTYPE.
5644 For a library call, FNTYPE is 0. */
5647 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5648 tree fntype
, /* tree ptr for function decl */
5649 rtx libname
, /* SYMBOL_REF of library name or 0 */
5653 struct cgraph_local_info
*i
;
5656 memset (cum
, 0, sizeof (*cum
));
5658 /* Initialize for the current callee. */
5661 cfun
->machine
->callee_pass_avx256_p
= false;
5662 cfun
->machine
->callee_return_avx256_p
= false;
5667 i
= cgraph_local_info (fndecl
);
5668 cum
->call_abi
= ix86_function_abi (fndecl
);
5669 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5674 cum
->call_abi
= ix86_function_type_abi (fntype
);
5676 fnret_type
= TREE_TYPE (fntype
);
5681 if (TARGET_VZEROUPPER
&& fnret_type
)
5683 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5685 if (function_pass_avx256_p (fnret_value
))
5687 /* The return value of this function uses 256bit AVX modes. */
5689 cfun
->machine
->callee_return_avx256_p
= true;
5691 cfun
->machine
->caller_return_avx256_p
= true;
5695 cum
->caller
= caller
;
5697 /* Set up the number of registers to use for passing arguments. */
5699 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5700 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5701 "or subtarget optimization implying it");
5702 cum
->nregs
= ix86_regparm
;
5705 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5706 ? X86_64_REGPARM_MAX
5707 : X86_64_MS_REGPARM_MAX
);
5711 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5714 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5715 ? X86_64_SSE_REGPARM_MAX
5716 : X86_64_MS_SSE_REGPARM_MAX
);
5720 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5721 cum
->warn_avx
= true;
5722 cum
->warn_sse
= true;
5723 cum
->warn_mmx
= true;
5725 /* Because type might mismatch in between caller and callee, we need to
5726 use actual type of function for local calls.
5727 FIXME: cgraph_analyze can be told to actually record if function uses
5728 va_start so for local functions maybe_vaarg can be made aggressive
5730 FIXME: once typesytem is fixed, we won't need this code anymore. */
5731 if (i
&& i
->local
&& i
->can_change_signature
)
5732 fntype
= TREE_TYPE (fndecl
);
5733 cum
->maybe_vaarg
= (fntype
5734 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5739 /* If there are variable arguments, then we won't pass anything
5740 in registers in 32-bit mode. */
5741 if (stdarg_p (fntype
))
5752 /* Use ecx and edx registers if function has fastcall attribute,
5753 else look for regparm information. */
5756 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5757 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5760 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5762 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5768 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5771 /* Set up the number of SSE registers used for passing SFmode
5772 and DFmode arguments. Warn for mismatching ABI. */
5773 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5777 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5778 But in the case of vector types, it is some vector mode.
5780 When we have only some of our vector isa extensions enabled, then there
5781 are some modes for which vector_mode_supported_p is false. For these
5782 modes, the generic vector support in gcc will choose some non-vector mode
5783 in order to implement the type. By computing the natural mode, we'll
5784 select the proper ABI location for the operand and not depend on whatever
5785 the middle-end decides to do with these vector types.
5787 The midde-end can't deal with the vector types > 16 bytes. In this
5788 case, we return the original mode and warn ABI change if CUM isn't
5791 static enum machine_mode
5792 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5794 enum machine_mode mode
= TYPE_MODE (type
);
5796 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5798 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5799 if ((size
== 8 || size
== 16 || size
== 32)
5800 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5801 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5803 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5805 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5806 mode
= MIN_MODE_VECTOR_FLOAT
;
5808 mode
= MIN_MODE_VECTOR_INT
;
5810 /* Get the mode which has this inner mode and number of units. */
5811 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5812 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5813 && GET_MODE_INNER (mode
) == innermode
)
5815 if (size
== 32 && !TARGET_AVX
)
5817 static bool warnedavx
;
5824 warning (0, "AVX vector argument without AVX "
5825 "enabled changes the ABI");
5827 return TYPE_MODE (type
);
5829 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5831 static bool warnedsse
;
5838 warning (0, "SSE vector argument without SSE "
5839 "enabled changes the ABI");
5854 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5855 this may not agree with the mode that the type system has chosen for the
5856 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5857 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5860 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5865 if (orig_mode
!= BLKmode
)
5866 tmp
= gen_rtx_REG (orig_mode
, regno
);
5869 tmp
= gen_rtx_REG (mode
, regno
);
5870 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5871 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5877 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5878 of this code is to classify each 8bytes of incoming argument by the register
5879 class and assign registers accordingly. */
5881 /* Return the union class of CLASS1 and CLASS2.
5882 See the x86-64 PS ABI for details. */
5884 static enum x86_64_reg_class
5885 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5887 /* Rule #1: If both classes are equal, this is the resulting class. */
5888 if (class1
== class2
)
5891 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5893 if (class1
== X86_64_NO_CLASS
)
5895 if (class2
== X86_64_NO_CLASS
)
5898 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5899 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5900 return X86_64_MEMORY_CLASS
;
5902 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5903 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5904 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5905 return X86_64_INTEGERSI_CLASS
;
5906 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5907 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5908 return X86_64_INTEGER_CLASS
;
5910 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5912 if (class1
== X86_64_X87_CLASS
5913 || class1
== X86_64_X87UP_CLASS
5914 || class1
== X86_64_COMPLEX_X87_CLASS
5915 || class2
== X86_64_X87_CLASS
5916 || class2
== X86_64_X87UP_CLASS
5917 || class2
== X86_64_COMPLEX_X87_CLASS
)
5918 return X86_64_MEMORY_CLASS
;
5920 /* Rule #6: Otherwise class SSE is used. */
5921 return X86_64_SSE_CLASS
;
5924 /* Classify the argument of type TYPE and mode MODE.
5925 CLASSES will be filled by the register class used to pass each word
5926 of the operand. The number of words is returned. In case the parameter
5927 should be passed in memory, 0 is returned. As a special case for zero
5928 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5930 BIT_OFFSET is used internally for handling records and specifies offset
5931 of the offset in bits modulo 256 to avoid overflow cases.
5933 See the x86-64 PS ABI for details.
5937 classify_argument (enum machine_mode mode
, const_tree type
,
5938 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5940 HOST_WIDE_INT bytes
=
5941 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5943 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5945 /* Variable sized entities are always passed/returned in memory. */
5949 if (mode
!= VOIDmode
5950 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5953 if (type
&& AGGREGATE_TYPE_P (type
))
5957 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5959 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5963 for (i
= 0; i
< words
; i
++)
5964 classes
[i
] = X86_64_NO_CLASS
;
5966 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5967 signalize memory class, so handle it as special case. */
5970 classes
[0] = X86_64_NO_CLASS
;
5974 /* Classify each field of record and merge classes. */
5975 switch (TREE_CODE (type
))
5978 /* And now merge the fields of structure. */
5979 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5981 if (TREE_CODE (field
) == FIELD_DECL
)
5985 if (TREE_TYPE (field
) == error_mark_node
)
5988 /* Bitfields are always classified as integer. Handle them
5989 early, since later code would consider them to be
5990 misaligned integers. */
5991 if (DECL_BIT_FIELD (field
))
5993 for (i
= (int_bit_position (field
)
5994 + (bit_offset
% 64)) / 8 / 8;
5995 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5996 + tree_low_cst (DECL_SIZE (field
), 0)
5999 merge_classes (X86_64_INTEGER_CLASS
,
6006 type
= TREE_TYPE (field
);
6008 /* Flexible array member is ignored. */
6009 if (TYPE_MODE (type
) == BLKmode
6010 && TREE_CODE (type
) == ARRAY_TYPE
6011 && TYPE_SIZE (type
) == NULL_TREE
6012 && TYPE_DOMAIN (type
) != NULL_TREE
6013 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6018 if (!warned
&& warn_psabi
)
6021 inform (input_location
,
6022 "the ABI of passing struct with"
6023 " a flexible array member has"
6024 " changed in GCC 4.4");
6028 num
= classify_argument (TYPE_MODE (type
), type
,
6030 (int_bit_position (field
)
6031 + bit_offset
) % 256);
6034 pos
= (int_bit_position (field
)
6035 + (bit_offset
% 64)) / 8 / 8;
6036 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6038 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6045 /* Arrays are handled as small records. */
6048 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6049 TREE_TYPE (type
), subclasses
, bit_offset
);
6053 /* The partial classes are now full classes. */
6054 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6055 subclasses
[0] = X86_64_SSE_CLASS
;
6056 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6057 && !((bit_offset
% 64) == 0 && bytes
== 4))
6058 subclasses
[0] = X86_64_INTEGER_CLASS
;
6060 for (i
= 0; i
< words
; i
++)
6061 classes
[i
] = subclasses
[i
% num
];
6066 case QUAL_UNION_TYPE
:
6067 /* Unions are similar to RECORD_TYPE but offset is always 0.
6069 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6071 if (TREE_CODE (field
) == FIELD_DECL
)
6075 if (TREE_TYPE (field
) == error_mark_node
)
6078 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6079 TREE_TYPE (field
), subclasses
,
6083 for (i
= 0; i
< num
; i
++)
6084 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6095 /* When size > 16 bytes, if the first one isn't
6096 X86_64_SSE_CLASS or any other ones aren't
6097 X86_64_SSEUP_CLASS, everything should be passed in
6099 if (classes
[0] != X86_64_SSE_CLASS
)
6102 for (i
= 1; i
< words
; i
++)
6103 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6107 /* Final merger cleanup. */
6108 for (i
= 0; i
< words
; i
++)
6110 /* If one class is MEMORY, everything should be passed in
6112 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6115 /* The X86_64_SSEUP_CLASS should be always preceded by
6116 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6117 if (classes
[i
] == X86_64_SSEUP_CLASS
6118 && classes
[i
- 1] != X86_64_SSE_CLASS
6119 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6121 /* The first one should never be X86_64_SSEUP_CLASS. */
6122 gcc_assert (i
!= 0);
6123 classes
[i
] = X86_64_SSE_CLASS
;
6126 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6127 everything should be passed in memory. */
6128 if (classes
[i
] == X86_64_X87UP_CLASS
6129 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6133 /* The first one should never be X86_64_X87UP_CLASS. */
6134 gcc_assert (i
!= 0);
6135 if (!warned
&& warn_psabi
)
6138 inform (input_location
,
6139 "the ABI of passing union with long double"
6140 " has changed in GCC 4.4");
6148 /* Compute alignment needed. We align all types to natural boundaries with
6149 exception of XFmode that is aligned to 64bits. */
6150 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6152 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6155 mode_alignment
= 128;
6156 else if (mode
== XCmode
)
6157 mode_alignment
= 256;
6158 if (COMPLEX_MODE_P (mode
))
6159 mode_alignment
/= 2;
6160 /* Misaligned fields are always returned in memory. */
6161 if (bit_offset
% mode_alignment
)
6165 /* for V1xx modes, just use the base mode */
6166 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6167 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6168 mode
= GET_MODE_INNER (mode
);
6170 /* Classification of atomic types. */
6175 classes
[0] = X86_64_SSE_CLASS
;
6178 classes
[0] = X86_64_SSE_CLASS
;
6179 classes
[1] = X86_64_SSEUP_CLASS
;
6189 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6193 classes
[0] = X86_64_INTEGERSI_CLASS
;
6196 else if (size
<= 64)
6198 classes
[0] = X86_64_INTEGER_CLASS
;
6201 else if (size
<= 64+32)
6203 classes
[0] = X86_64_INTEGER_CLASS
;
6204 classes
[1] = X86_64_INTEGERSI_CLASS
;
6207 else if (size
<= 64+64)
6209 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6217 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6221 /* OImode shouldn't be used directly. */
6226 if (!(bit_offset
% 64))
6227 classes
[0] = X86_64_SSESF_CLASS
;
6229 classes
[0] = X86_64_SSE_CLASS
;
6232 classes
[0] = X86_64_SSEDF_CLASS
;
6235 classes
[0] = X86_64_X87_CLASS
;
6236 classes
[1] = X86_64_X87UP_CLASS
;
6239 classes
[0] = X86_64_SSE_CLASS
;
6240 classes
[1] = X86_64_SSEUP_CLASS
;
6243 classes
[0] = X86_64_SSE_CLASS
;
6244 if (!(bit_offset
% 64))
6250 if (!warned
&& warn_psabi
)
6253 inform (input_location
,
6254 "the ABI of passing structure with complex float"
6255 " member has changed in GCC 4.4");
6257 classes
[1] = X86_64_SSESF_CLASS
;
6261 classes
[0] = X86_64_SSEDF_CLASS
;
6262 classes
[1] = X86_64_SSEDF_CLASS
;
6265 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6268 /* This modes is larger than 16 bytes. */
6276 classes
[0] = X86_64_SSE_CLASS
;
6277 classes
[1] = X86_64_SSEUP_CLASS
;
6278 classes
[2] = X86_64_SSEUP_CLASS
;
6279 classes
[3] = X86_64_SSEUP_CLASS
;
6287 classes
[0] = X86_64_SSE_CLASS
;
6288 classes
[1] = X86_64_SSEUP_CLASS
;
6296 classes
[0] = X86_64_SSE_CLASS
;
6302 gcc_assert (VECTOR_MODE_P (mode
));
6307 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6309 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6310 classes
[0] = X86_64_INTEGERSI_CLASS
;
6312 classes
[0] = X86_64_INTEGER_CLASS
;
6313 classes
[1] = X86_64_INTEGER_CLASS
;
6314 return 1 + (bytes
> 8);
6318 /* Examine the argument and return set number of register required in each
6319 class. Return 0 iff parameter should be passed in memory. */
6321 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6322 int *int_nregs
, int *sse_nregs
)
6324 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6325 int n
= classify_argument (mode
, type
, regclass
, 0);
6331 for (n
--; n
>= 0; n
--)
6332 switch (regclass
[n
])
6334 case X86_64_INTEGER_CLASS
:
6335 case X86_64_INTEGERSI_CLASS
:
6338 case X86_64_SSE_CLASS
:
6339 case X86_64_SSESF_CLASS
:
6340 case X86_64_SSEDF_CLASS
:
6343 case X86_64_NO_CLASS
:
6344 case X86_64_SSEUP_CLASS
:
6346 case X86_64_X87_CLASS
:
6347 case X86_64_X87UP_CLASS
:
6351 case X86_64_COMPLEX_X87_CLASS
:
6352 return in_return
? 2 : 0;
6353 case X86_64_MEMORY_CLASS
:
6359 /* Construct container for the argument used by GCC interface. See
6360 FUNCTION_ARG for the detailed description. */
6363 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6364 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6365 const int *intreg
, int sse_regno
)
6367 /* The following variables hold the static issued_error state. */
6368 static bool issued_sse_arg_error
;
6369 static bool issued_sse_ret_error
;
6370 static bool issued_x87_ret_error
;
6372 enum machine_mode tmpmode
;
6374 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6375 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6379 int needed_sseregs
, needed_intregs
;
6380 rtx exp
[MAX_CLASSES
];
6383 n
= classify_argument (mode
, type
, regclass
, 0);
6386 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6389 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6392 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6393 some less clueful developer tries to use floating-point anyway. */
6394 if (needed_sseregs
&& !TARGET_SSE
)
6398 if (!issued_sse_ret_error
)
6400 error ("SSE register return with SSE disabled");
6401 issued_sse_ret_error
= true;
6404 else if (!issued_sse_arg_error
)
6406 error ("SSE register argument with SSE disabled");
6407 issued_sse_arg_error
= true;
6412 /* Likewise, error if the ABI requires us to return values in the
6413 x87 registers and the user specified -mno-80387. */
6414 if (!TARGET_80387
&& in_return
)
6415 for (i
= 0; i
< n
; i
++)
6416 if (regclass
[i
] == X86_64_X87_CLASS
6417 || regclass
[i
] == X86_64_X87UP_CLASS
6418 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6420 if (!issued_x87_ret_error
)
6422 error ("x87 register return with x87 disabled");
6423 issued_x87_ret_error
= true;
6428 /* First construct simple cases. Avoid SCmode, since we want to use
6429 single register to pass this type. */
6430 if (n
== 1 && mode
!= SCmode
)
6431 switch (regclass
[0])
6433 case X86_64_INTEGER_CLASS
:
6434 case X86_64_INTEGERSI_CLASS
:
6435 return gen_rtx_REG (mode
, intreg
[0]);
6436 case X86_64_SSE_CLASS
:
6437 case X86_64_SSESF_CLASS
:
6438 case X86_64_SSEDF_CLASS
:
6439 if (mode
!= BLKmode
)
6440 return gen_reg_or_parallel (mode
, orig_mode
,
6441 SSE_REGNO (sse_regno
));
6443 case X86_64_X87_CLASS
:
6444 case X86_64_COMPLEX_X87_CLASS
:
6445 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6446 case X86_64_NO_CLASS
:
6447 /* Zero sized array, struct or class. */
6453 && regclass
[0] == X86_64_SSE_CLASS
6454 && regclass
[1] == X86_64_SSEUP_CLASS
6456 return gen_reg_or_parallel (mode
, orig_mode
,
6457 SSE_REGNO (sse_regno
));
6459 && regclass
[0] == X86_64_SSE_CLASS
6460 && regclass
[1] == X86_64_SSEUP_CLASS
6461 && regclass
[2] == X86_64_SSEUP_CLASS
6462 && regclass
[3] == X86_64_SSEUP_CLASS
6464 return gen_reg_or_parallel (mode
, orig_mode
,
6465 SSE_REGNO (sse_regno
));
6467 && regclass
[0] == X86_64_X87_CLASS
6468 && regclass
[1] == X86_64_X87UP_CLASS
)
6469 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6472 && regclass
[0] == X86_64_INTEGER_CLASS
6473 && regclass
[1] == X86_64_INTEGER_CLASS
6474 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6475 && intreg
[0] + 1 == intreg
[1])
6476 return gen_rtx_REG (mode
, intreg
[0]);
6478 /* Otherwise figure out the entries of the PARALLEL. */
6479 for (i
= 0; i
< n
; i
++)
6483 switch (regclass
[i
])
6485 case X86_64_NO_CLASS
:
6487 case X86_64_INTEGER_CLASS
:
6488 case X86_64_INTEGERSI_CLASS
:
6489 /* Merge TImodes on aligned occasions here too. */
6490 if (i
* 8 + 8 > bytes
)
6492 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6493 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6497 /* We've requested 24 bytes we
6498 don't have mode for. Use DImode. */
6499 if (tmpmode
== BLKmode
)
6502 = gen_rtx_EXPR_LIST (VOIDmode
,
6503 gen_rtx_REG (tmpmode
, *intreg
),
6507 case X86_64_SSESF_CLASS
:
6509 = gen_rtx_EXPR_LIST (VOIDmode
,
6510 gen_rtx_REG (SFmode
,
6511 SSE_REGNO (sse_regno
)),
6515 case X86_64_SSEDF_CLASS
:
6517 = gen_rtx_EXPR_LIST (VOIDmode
,
6518 gen_rtx_REG (DFmode
,
6519 SSE_REGNO (sse_regno
)),
6523 case X86_64_SSE_CLASS
:
6531 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6541 && regclass
[1] == X86_64_SSEUP_CLASS
6542 && regclass
[2] == X86_64_SSEUP_CLASS
6543 && regclass
[3] == X86_64_SSEUP_CLASS
);
6551 = gen_rtx_EXPR_LIST (VOIDmode
,
6552 gen_rtx_REG (tmpmode
,
6553 SSE_REGNO (sse_regno
)),
6562 /* Empty aligned struct, union or class. */
6566 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6567 for (i
= 0; i
< nexps
; i
++)
6568 XVECEXP (ret
, 0, i
) = exp
[i
];
6572 /* Update the data in CUM to advance over an argument of mode MODE
6573 and data type TYPE. (TYPE is null for libcalls where that information
6574 may not be available.) */
6577 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6578 const_tree type
, HOST_WIDE_INT bytes
,
6579 HOST_WIDE_INT words
)
6595 cum
->words
+= words
;
6596 cum
->nregs
-= words
;
6597 cum
->regno
+= words
;
6599 if (cum
->nregs
<= 0)
6607 /* OImode shouldn't be used directly. */
6611 if (cum
->float_in_sse
< 2)
6614 if (cum
->float_in_sse
< 1)
6631 if (!type
|| !AGGREGATE_TYPE_P (type
))
6633 cum
->sse_words
+= words
;
6634 cum
->sse_nregs
-= 1;
6635 cum
->sse_regno
+= 1;
6636 if (cum
->sse_nregs
<= 0)
6650 if (!type
|| !AGGREGATE_TYPE_P (type
))
6652 cum
->mmx_words
+= words
;
6653 cum
->mmx_nregs
-= 1;
6654 cum
->mmx_regno
+= 1;
6655 if (cum
->mmx_nregs
<= 0)
6666 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6667 const_tree type
, HOST_WIDE_INT words
, bool named
)
6669 int int_nregs
, sse_nregs
;
6671 /* Unnamed 256bit vector mode parameters are passed on stack. */
6672 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6675 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6676 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6678 cum
->nregs
-= int_nregs
;
6679 cum
->sse_nregs
-= sse_nregs
;
6680 cum
->regno
+= int_nregs
;
6681 cum
->sse_regno
+= sse_nregs
;
6685 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6686 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6687 cum
->words
+= words
;
6692 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6693 HOST_WIDE_INT words
)
6695 /* Otherwise, this should be passed indirect. */
6696 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6698 cum
->words
+= words
;
6706 /* Update the data in CUM to advance over an argument of mode MODE and
6707 data type TYPE. (TYPE is null for libcalls where that information
6708 may not be available.) */
6711 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6712 const_tree type
, bool named
)
6714 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6715 HOST_WIDE_INT bytes
, words
;
6717 if (mode
== BLKmode
)
6718 bytes
= int_size_in_bytes (type
);
6720 bytes
= GET_MODE_SIZE (mode
);
6721 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6724 mode
= type_natural_mode (type
, NULL
);
6726 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6727 function_arg_advance_ms_64 (cum
, bytes
, words
);
6728 else if (TARGET_64BIT
)
6729 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6731 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6734 /* Define where to put the arguments to a function.
6735 Value is zero to push the argument on the stack,
6736 or a hard register in which to store the argument.
6738 MODE is the argument's machine mode.
6739 TYPE is the data type of the argument (as a tree).
6740 This is null for libcalls where that information may
6742 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6743 the preceding args and about the function being called.
6744 NAMED is nonzero if this argument is a named parameter
6745 (otherwise it is an extra parameter matching an ellipsis). */
6748 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6749 enum machine_mode orig_mode
, const_tree type
,
6750 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6752 static bool warnedsse
, warnedmmx
;
6754 /* Avoid the AL settings for the Unix64 ABI. */
6755 if (mode
== VOIDmode
)
6771 if (words
<= cum
->nregs
)
6773 int regno
= cum
->regno
;
6775 /* Fastcall allocates the first two DWORD (SImode) or
6776 smaller arguments to ECX and EDX if it isn't an
6782 || (type
&& AGGREGATE_TYPE_P (type
)))
6785 /* ECX not EAX is the first allocated register. */
6786 if (regno
== AX_REG
)
6789 return gen_rtx_REG (mode
, regno
);
6794 if (cum
->float_in_sse
< 2)
6797 if (cum
->float_in_sse
< 1)
6801 /* In 32bit, we pass TImode in xmm registers. */
6808 if (!type
|| !AGGREGATE_TYPE_P (type
))
6810 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6813 warning (0, "SSE vector argument without SSE enabled "
6817 return gen_reg_or_parallel (mode
, orig_mode
,
6818 cum
->sse_regno
+ FIRST_SSE_REG
);
6823 /* OImode shouldn't be used directly. */
6832 if (!type
|| !AGGREGATE_TYPE_P (type
))
6835 return gen_reg_or_parallel (mode
, orig_mode
,
6836 cum
->sse_regno
+ FIRST_SSE_REG
);
6846 if (!type
|| !AGGREGATE_TYPE_P (type
))
6848 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6851 warning (0, "MMX vector argument without MMX enabled "
6855 return gen_reg_or_parallel (mode
, orig_mode
,
6856 cum
->mmx_regno
+ FIRST_MMX_REG
);
6865 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6866 enum machine_mode orig_mode
, const_tree type
, bool named
)
6868 /* Handle a hidden AL argument containing number of registers
6869 for varargs x86-64 functions. */
6870 if (mode
== VOIDmode
)
6871 return GEN_INT (cum
->maybe_vaarg
6872 ? (cum
->sse_nregs
< 0
6873 ? X86_64_SSE_REGPARM_MAX
6888 /* Unnamed 256bit vector mode parameters are passed on stack. */
6894 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6896 &x86_64_int_parameter_registers
[cum
->regno
],
6901 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6902 enum machine_mode orig_mode
, bool named
,
6903 HOST_WIDE_INT bytes
)
6907 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6908 We use value of -2 to specify that current function call is MSABI. */
6909 if (mode
== VOIDmode
)
6910 return GEN_INT (-2);
6912 /* If we've run out of registers, it goes on the stack. */
6913 if (cum
->nregs
== 0)
6916 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6918 /* Only floating point modes are passed in anything but integer regs. */
6919 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6922 regno
= cum
->regno
+ FIRST_SSE_REG
;
6927 /* Unnamed floating parameters are passed in both the
6928 SSE and integer registers. */
6929 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6930 t2
= gen_rtx_REG (mode
, regno
);
6931 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6932 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6933 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6936 /* Handle aggregated types passed in register. */
6937 if (orig_mode
== BLKmode
)
6939 if (bytes
> 0 && bytes
<= 8)
6940 mode
= (bytes
> 4 ? DImode
: SImode
);
6941 if (mode
== BLKmode
)
6945 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6948 /* Return where to put the arguments to a function.
6949 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6951 MODE is the argument's machine mode. TYPE is the data type of the
6952 argument. It is null for libcalls where that information may not be
6953 available. CUM gives information about the preceding args and about
6954 the function being called. NAMED is nonzero if this argument is a
6955 named parameter (otherwise it is an extra parameter matching an
6959 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6960 const_tree type
, bool named
)
6962 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6963 enum machine_mode mode
= omode
;
6964 HOST_WIDE_INT bytes
, words
;
6967 if (mode
== BLKmode
)
6968 bytes
= int_size_in_bytes (type
);
6970 bytes
= GET_MODE_SIZE (mode
);
6971 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6973 /* To simplify the code below, represent vector types with a vector mode
6974 even if MMX/SSE are not active. */
6975 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6976 mode
= type_natural_mode (type
, cum
);
6978 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6979 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6980 else if (TARGET_64BIT
)
6981 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6983 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6985 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
6987 /* This argument uses 256bit AVX modes. */
6989 cfun
->machine
->callee_pass_avx256_p
= true;
6991 cfun
->machine
->caller_pass_avx256_p
= true;
6997 /* A C expression that indicates when an argument must be passed by
6998 reference. If nonzero for an argument, a copy of that argument is
6999 made in memory and a pointer to the argument is passed instead of
7000 the argument itself. The pointer is passed in whatever way is
7001 appropriate for passing a pointer to that type. */
7004 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
7005 enum machine_mode mode ATTRIBUTE_UNUSED
,
7006 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7008 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7010 /* See Windows x64 Software Convention. */
7011 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7013 int msize
= (int) GET_MODE_SIZE (mode
);
7016 /* Arrays are passed by reference. */
7017 if (TREE_CODE (type
) == ARRAY_TYPE
)
7020 if (AGGREGATE_TYPE_P (type
))
7022 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7023 are passed by reference. */
7024 msize
= int_size_in_bytes (type
);
7028 /* __m128 is passed by reference. */
7030 case 1: case 2: case 4: case 8:
7036 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7042 /* Return true when TYPE should be 128bit aligned for 32bit argument
7043 passing ABI. XXX: This function is obsolete and is only used for
7044 checking psABI compatibility with previous versions of GCC. */
7047 ix86_compat_aligned_value_p (const_tree type
)
7049 enum machine_mode mode
= TYPE_MODE (type
);
7050 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7054 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7056 if (TYPE_ALIGN (type
) < 128)
7059 if (AGGREGATE_TYPE_P (type
))
7061 /* Walk the aggregates recursively. */
7062 switch (TREE_CODE (type
))
7066 case QUAL_UNION_TYPE
:
7070 /* Walk all the structure fields. */
7071 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7073 if (TREE_CODE (field
) == FIELD_DECL
7074 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7081 /* Just for use if some languages passes arrays by value. */
7082 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7093 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7094 XXX: This function is obsolete and is only used for checking psABI
7095 compatibility with previous versions of GCC. */
7098 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7099 const_tree type
, unsigned int align
)
7101 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7102 natural boundaries. */
7103 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7105 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7106 make an exception for SSE modes since these require 128bit
7109 The handling here differs from field_alignment. ICC aligns MMX
7110 arguments to 4 byte boundaries, while structure fields are aligned
7111 to 8 byte boundaries. */
7114 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7115 align
= PARM_BOUNDARY
;
7119 if (!ix86_compat_aligned_value_p (type
))
7120 align
= PARM_BOUNDARY
;
7123 if (align
> BIGGEST_ALIGNMENT
)
7124 align
= BIGGEST_ALIGNMENT
;
7128 /* Return true when TYPE should be 128bit aligned for 32bit argument
7132 ix86_contains_aligned_value_p (const_tree type
)
7134 enum machine_mode mode
= TYPE_MODE (type
);
7136 if (mode
== XFmode
|| mode
== XCmode
)
7139 if (TYPE_ALIGN (type
) < 128)
7142 if (AGGREGATE_TYPE_P (type
))
7144 /* Walk the aggregates recursively. */
7145 switch (TREE_CODE (type
))
7149 case QUAL_UNION_TYPE
:
7153 /* Walk all the structure fields. */
7154 for (field
= TYPE_FIELDS (type
);
7156 field
= DECL_CHAIN (field
))
7158 if (TREE_CODE (field
) == FIELD_DECL
7159 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7166 /* Just for use if some languages passes arrays by value. */
7167 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7176 return TYPE_ALIGN (type
) >= 128;
7181 /* Gives the alignment boundary, in bits, of an argument with the
7182 specified mode and type. */
7185 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7190 /* Since the main variant type is used for call, we convert it to
7191 the main variant type. */
7192 type
= TYPE_MAIN_VARIANT (type
);
7193 align
= TYPE_ALIGN (type
);
7196 align
= GET_MODE_ALIGNMENT (mode
);
7197 if (align
< PARM_BOUNDARY
)
7198 align
= PARM_BOUNDARY
;
7202 unsigned int saved_align
= align
;
7206 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7209 if (mode
== XFmode
|| mode
== XCmode
)
7210 align
= PARM_BOUNDARY
;
7212 else if (!ix86_contains_aligned_value_p (type
))
7213 align
= PARM_BOUNDARY
;
7216 align
= PARM_BOUNDARY
;
7221 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7225 inform (input_location
,
7226 "The ABI for passing parameters with %d-byte"
7227 " alignment has changed in GCC 4.6",
7228 align
/ BITS_PER_UNIT
);
7235 /* Return true if N is a possible register number of function value. */
7238 ix86_function_value_regno_p (const unsigned int regno
)
7245 case FIRST_FLOAT_REG
:
7246 /* TODO: The function should depend on current function ABI but
7247 builtins.c would need updating then. Therefore we use the
7249 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7251 return TARGET_FLOAT_RETURNS_IN_80387
;
7257 if (TARGET_MACHO
|| TARGET_64BIT
)
7265 /* Define how to find the value returned by a function.
7266 VALTYPE is the data type of the value (as a tree).
7267 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7268 otherwise, FUNC is 0. */
7271 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7272 const_tree fntype
, const_tree fn
)
7276 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7277 we normally prevent this case when mmx is not available. However
7278 some ABIs may require the result to be returned like DImode. */
7279 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7280 regno
= FIRST_MMX_REG
;
7282 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7283 we prevent this case when sse is not available. However some ABIs
7284 may require the result to be returned like integer TImode. */
7285 else if (mode
== TImode
7286 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7287 regno
= FIRST_SSE_REG
;
7289 /* 32-byte vector modes in %ymm0. */
7290 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7291 regno
= FIRST_SSE_REG
;
7293 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7294 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7295 regno
= FIRST_FLOAT_REG
;
7297 /* Most things go in %eax. */
7300 /* Override FP return register with %xmm0 for local functions when
7301 SSE math is enabled or for functions with sseregparm attribute. */
7302 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7304 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7305 if ((sse_level
>= 1 && mode
== SFmode
)
7306 || (sse_level
== 2 && mode
== DFmode
))
7307 regno
= FIRST_SSE_REG
;
7310 /* OImode shouldn't be used directly. */
7311 gcc_assert (mode
!= OImode
);
7313 return gen_rtx_REG (orig_mode
, regno
);
7317 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7322 /* Handle libcalls, which don't provide a type node. */
7323 if (valtype
== NULL
)
7337 regno
= FIRST_SSE_REG
;
7341 regno
= FIRST_FLOAT_REG
;
7349 return gen_rtx_REG (mode
, regno
);
7351 else if (POINTER_TYPE_P (valtype
))
7353 /* Pointers are always returned in word_mode. */
7357 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7358 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7359 x86_64_int_return_registers
, 0);
7361 /* For zero sized structures, construct_container returns NULL, but we
7362 need to keep rest of compiler happy by returning meaningful value. */
7364 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7370 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7372 unsigned int regno
= AX_REG
;
7376 switch (GET_MODE_SIZE (mode
))
7379 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7380 && !COMPLEX_MODE_P (mode
))
7381 regno
= FIRST_SSE_REG
;
7385 if (mode
== SFmode
|| mode
== DFmode
)
7386 regno
= FIRST_SSE_REG
;
7392 return gen_rtx_REG (orig_mode
, regno
);
7396 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7397 enum machine_mode orig_mode
, enum machine_mode mode
)
7399 const_tree fn
, fntype
;
7402 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7403 fn
= fntype_or_decl
;
7404 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7406 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7407 return function_value_ms_64 (orig_mode
, mode
);
7408 else if (TARGET_64BIT
)
7409 return function_value_64 (orig_mode
, mode
, valtype
);
7411 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7415 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7416 bool outgoing ATTRIBUTE_UNUSED
)
7418 enum machine_mode mode
, orig_mode
;
7420 orig_mode
= TYPE_MODE (valtype
);
7421 mode
= type_natural_mode (valtype
, NULL
);
7422 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7425 /* Pointer function arguments and return values are promoted to
7428 static enum machine_mode
7429 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7430 int *punsignedp
, const_tree fntype
,
7433 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7435 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7438 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7443 ix86_libcall_value (enum machine_mode mode
)
7445 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7448 /* Return true iff type is returned in memory. */
7450 static bool ATTRIBUTE_UNUSED
7451 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7455 if (mode
== BLKmode
)
7458 size
= int_size_in_bytes (type
);
7460 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7463 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7465 /* User-created vectors small enough to fit in EAX. */
7469 /* MMX/3dNow values are returned in MM0,
7470 except when it doesn't exits or the ABI prescribes otherwise. */
7472 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7474 /* SSE values are returned in XMM0, except when it doesn't exist. */
7478 /* AVX values are returned in YMM0, except when it doesn't exist. */
7489 /* OImode shouldn't be used directly. */
7490 gcc_assert (mode
!= OImode
);
7495 static bool ATTRIBUTE_UNUSED
7496 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7498 int needed_intregs
, needed_sseregs
;
7499 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7502 static bool ATTRIBUTE_UNUSED
7503 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7505 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7507 /* __m128 is returned in xmm0. */
7508 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7509 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7512 /* Otherwise, the size must be exactly in [1248]. */
7513 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7517 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7519 #ifdef SUBTARGET_RETURN_IN_MEMORY
7520 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7522 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7526 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7527 return return_in_memory_ms_64 (type
, mode
);
7529 return return_in_memory_64 (type
, mode
);
7532 return return_in_memory_32 (type
, mode
);
7536 /* When returning SSE vector types, we have a choice of either
7537 (1) being abi incompatible with a -march switch, or
7538 (2) generating an error.
7539 Given no good solution, I think the safest thing is one warning.
7540 The user won't be able to use -Werror, but....
7542 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7543 called in response to actually generating a caller or callee that
7544 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7545 via aggregate_value_p for general type probing from tree-ssa. */
7548 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7550 static bool warnedsse
, warnedmmx
;
7552 if (!TARGET_64BIT
&& type
)
7554 /* Look at the return type of the function, not the function type. */
7555 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7557 if (!TARGET_SSE
&& !warnedsse
)
7560 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7563 warning (0, "SSE vector return without SSE enabled "
7568 if (!TARGET_MMX
&& !warnedmmx
)
7570 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7573 warning (0, "MMX vector return without MMX enabled "
7583 /* Create the va_list data type. */
7585 /* Returns the calling convention specific va_list date type.
7586 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7589 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7591 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7593 /* For i386 we use plain pointer to argument area. */
7594 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7595 return build_pointer_type (char_type_node
);
7597 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7598 type_decl
= build_decl (BUILTINS_LOCATION
,
7599 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7601 f_gpr
= build_decl (BUILTINS_LOCATION
,
7602 FIELD_DECL
, get_identifier ("gp_offset"),
7603 unsigned_type_node
);
7604 f_fpr
= build_decl (BUILTINS_LOCATION
,
7605 FIELD_DECL
, get_identifier ("fp_offset"),
7606 unsigned_type_node
);
7607 f_ovf
= build_decl (BUILTINS_LOCATION
,
7608 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7610 f_sav
= build_decl (BUILTINS_LOCATION
,
7611 FIELD_DECL
, get_identifier ("reg_save_area"),
7614 va_list_gpr_counter_field
= f_gpr
;
7615 va_list_fpr_counter_field
= f_fpr
;
7617 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7618 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7619 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7620 DECL_FIELD_CONTEXT (f_sav
) = record
;
7622 TYPE_STUB_DECL (record
) = type_decl
;
7623 TYPE_NAME (record
) = type_decl
;
7624 TYPE_FIELDS (record
) = f_gpr
;
7625 DECL_CHAIN (f_gpr
) = f_fpr
;
7626 DECL_CHAIN (f_fpr
) = f_ovf
;
7627 DECL_CHAIN (f_ovf
) = f_sav
;
7629 layout_type (record
);
7631 /* The correct type is an array type of one element. */
7632 return build_array_type (record
, build_index_type (size_zero_node
));
7635 /* Setup the builtin va_list data type and for 64-bit the additional
7636 calling convention specific va_list data types. */
7639 ix86_build_builtin_va_list (void)
7641 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7643 /* Initialize abi specific va_list builtin types. */
7647 if (ix86_abi
== MS_ABI
)
7649 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7650 if (TREE_CODE (t
) != RECORD_TYPE
)
7651 t
= build_variant_type_copy (t
);
7652 sysv_va_list_type_node
= t
;
7657 if (TREE_CODE (t
) != RECORD_TYPE
)
7658 t
= build_variant_type_copy (t
);
7659 sysv_va_list_type_node
= t
;
7661 if (ix86_abi
!= MS_ABI
)
7663 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7664 if (TREE_CODE (t
) != RECORD_TYPE
)
7665 t
= build_variant_type_copy (t
);
7666 ms_va_list_type_node
= t
;
7671 if (TREE_CODE (t
) != RECORD_TYPE
)
7672 t
= build_variant_type_copy (t
);
7673 ms_va_list_type_node
= t
;
7680 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7683 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7689 /* GPR size of varargs save area. */
7690 if (cfun
->va_list_gpr_size
)
7691 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7693 ix86_varargs_gpr_size
= 0;
7695 /* FPR size of varargs save area. We don't need it if we don't pass
7696 anything in SSE registers. */
7697 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7698 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7700 ix86_varargs_fpr_size
= 0;
7702 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7705 save_area
= frame_pointer_rtx
;
7706 set
= get_varargs_alias_set ();
7708 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7709 if (max
> X86_64_REGPARM_MAX
)
7710 max
= X86_64_REGPARM_MAX
;
7712 for (i
= cum
->regno
; i
< max
; i
++)
7714 mem
= gen_rtx_MEM (word_mode
,
7715 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7716 MEM_NOTRAP_P (mem
) = 1;
7717 set_mem_alias_set (mem
, set
);
7718 emit_move_insn (mem
,
7719 gen_rtx_REG (word_mode
,
7720 x86_64_int_parameter_registers
[i
]));
7723 if (ix86_varargs_fpr_size
)
7725 enum machine_mode smode
;
7728 /* Now emit code to save SSE registers. The AX parameter contains number
7729 of SSE parameter registers used to call this function, though all we
7730 actually check here is the zero/non-zero status. */
7732 label
= gen_label_rtx ();
7733 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7734 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7737 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7738 we used movdqa (i.e. TImode) instead? Perhaps even better would
7739 be if we could determine the real mode of the data, via a hook
7740 into pass_stdarg. Ignore all that for now. */
7742 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7743 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7745 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7746 if (max
> X86_64_SSE_REGPARM_MAX
)
7747 max
= X86_64_SSE_REGPARM_MAX
;
7749 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7751 mem
= plus_constant (Pmode
, save_area
,
7752 i
* 16 + ix86_varargs_gpr_size
);
7753 mem
= gen_rtx_MEM (smode
, mem
);
7754 MEM_NOTRAP_P (mem
) = 1;
7755 set_mem_alias_set (mem
, set
);
7756 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7758 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7766 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7768 alias_set_type set
= get_varargs_alias_set ();
7771 /* Reset to zero, as there might be a sysv vaarg used
7773 ix86_varargs_gpr_size
= 0;
7774 ix86_varargs_fpr_size
= 0;
7776 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7780 mem
= gen_rtx_MEM (Pmode
,
7781 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7782 i
* UNITS_PER_WORD
));
7783 MEM_NOTRAP_P (mem
) = 1;
7784 set_mem_alias_set (mem
, set
);
7786 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7787 emit_move_insn (mem
, reg
);
7792 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7793 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7796 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7797 CUMULATIVE_ARGS next_cum
;
7800 /* This argument doesn't appear to be used anymore. Which is good,
7801 because the old code here didn't suppress rtl generation. */
7802 gcc_assert (!no_rtl
);
7807 fntype
= TREE_TYPE (current_function_decl
);
7809 /* For varargs, we do not want to skip the dummy va_dcl argument.
7810 For stdargs, we do want to skip the last named argument. */
7812 if (stdarg_p (fntype
))
7813 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7816 if (cum
->call_abi
== MS_ABI
)
7817 setup_incoming_varargs_ms_64 (&next_cum
);
7819 setup_incoming_varargs_64 (&next_cum
);
7822 /* Checks if TYPE is of kind va_list char *. */
7825 is_va_list_char_pointer (tree type
)
7829 /* For 32-bit it is always true. */
7832 canonic
= ix86_canonical_va_list_type (type
);
7833 return (canonic
== ms_va_list_type_node
7834 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7837 /* Implement va_start. */
7840 ix86_va_start (tree valist
, rtx nextarg
)
7842 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7843 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7844 tree gpr
, fpr
, ovf
, sav
, t
;
7848 if (flag_split_stack
7849 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7851 unsigned int scratch_regno
;
7853 /* When we are splitting the stack, we can't refer to the stack
7854 arguments using internal_arg_pointer, because they may be on
7855 the old stack. The split stack prologue will arrange to
7856 leave a pointer to the old stack arguments in a scratch
7857 register, which we here copy to a pseudo-register. The split
7858 stack prologue can't set the pseudo-register directly because
7859 it (the prologue) runs before any registers have been saved. */
7861 scratch_regno
= split_stack_prologue_scratch_regno ();
7862 if (scratch_regno
!= INVALID_REGNUM
)
7866 reg
= gen_reg_rtx (Pmode
);
7867 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7870 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7874 push_topmost_sequence ();
7875 emit_insn_after (seq
, entry_of_function ());
7876 pop_topmost_sequence ();
7880 /* Only 64bit target needs something special. */
7881 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7883 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7884 std_expand_builtin_va_start (valist
, nextarg
);
7889 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7890 next
= expand_binop (ptr_mode
, add_optab
,
7891 cfun
->machine
->split_stack_varargs_pointer
,
7892 crtl
->args
.arg_offset_rtx
,
7893 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7894 convert_move (va_r
, next
, 0);
7899 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7900 f_fpr
= DECL_CHAIN (f_gpr
);
7901 f_ovf
= DECL_CHAIN (f_fpr
);
7902 f_sav
= DECL_CHAIN (f_ovf
);
7904 valist
= build_simple_mem_ref (valist
);
7905 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7906 /* The following should be folded into the MEM_REF offset. */
7907 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7909 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7911 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7913 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7916 /* Count number of gp and fp argument registers used. */
7917 words
= crtl
->args
.info
.words
;
7918 n_gpr
= crtl
->args
.info
.regno
;
7919 n_fpr
= crtl
->args
.info
.sse_regno
;
7921 if (cfun
->va_list_gpr_size
)
7923 type
= TREE_TYPE (gpr
);
7924 t
= build2 (MODIFY_EXPR
, type
,
7925 gpr
, build_int_cst (type
, n_gpr
* 8));
7926 TREE_SIDE_EFFECTS (t
) = 1;
7927 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7930 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7932 type
= TREE_TYPE (fpr
);
7933 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7934 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7935 TREE_SIDE_EFFECTS (t
) = 1;
7936 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7939 /* Find the overflow area. */
7940 type
= TREE_TYPE (ovf
);
7941 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7942 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7944 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7945 t
= make_tree (type
, ovf_rtx
);
7947 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7948 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7949 TREE_SIDE_EFFECTS (t
) = 1;
7950 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7952 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7954 /* Find the register save area.
7955 Prologue of the function save it right above stack frame. */
7956 type
= TREE_TYPE (sav
);
7957 t
= make_tree (type
, frame_pointer_rtx
);
7958 if (!ix86_varargs_gpr_size
)
7959 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7960 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7961 TREE_SIDE_EFFECTS (t
) = 1;
7962 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7966 /* Implement va_arg. */
7969 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7972 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7973 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7974 tree gpr
, fpr
, ovf
, sav
, t
;
7976 tree lab_false
, lab_over
= NULL_TREE
;
7981 enum machine_mode nat_mode
;
7982 unsigned int arg_boundary
;
7984 /* Only 64bit target needs something special. */
7985 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7986 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7988 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7989 f_fpr
= DECL_CHAIN (f_gpr
);
7990 f_ovf
= DECL_CHAIN (f_fpr
);
7991 f_sav
= DECL_CHAIN (f_ovf
);
7993 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7994 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7995 valist
= build_va_arg_indirect_ref (valist
);
7996 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7997 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7998 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8000 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8002 type
= build_pointer_type (type
);
8003 size
= int_size_in_bytes (type
);
8004 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8006 nat_mode
= type_natural_mode (type
, NULL
);
8015 /* Unnamed 256bit vector mode parameters are passed on stack. */
8016 if (!TARGET_64BIT_MS_ABI
)
8023 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8024 type
, 0, X86_64_REGPARM_MAX
,
8025 X86_64_SSE_REGPARM_MAX
, intreg
,
8030 /* Pull the value out of the saved registers. */
8032 addr
= create_tmp_var (ptr_type_node
, "addr");
8036 int needed_intregs
, needed_sseregs
;
8038 tree int_addr
, sse_addr
;
8040 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8041 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8043 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8045 need_temp
= (!REG_P (container
)
8046 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8047 || TYPE_ALIGN (type
) > 128));
8049 /* In case we are passing structure, verify that it is consecutive block
8050 on the register save area. If not we need to do moves. */
8051 if (!need_temp
&& !REG_P (container
))
8053 /* Verify that all registers are strictly consecutive */
8054 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8058 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8060 rtx slot
= XVECEXP (container
, 0, i
);
8061 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8062 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8070 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8072 rtx slot
= XVECEXP (container
, 0, i
);
8073 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8074 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8086 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8087 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8090 /* First ensure that we fit completely in registers. */
8093 t
= build_int_cst (TREE_TYPE (gpr
),
8094 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8095 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8096 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8097 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8098 gimplify_and_add (t
, pre_p
);
8102 t
= build_int_cst (TREE_TYPE (fpr
),
8103 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8104 + X86_64_REGPARM_MAX
* 8);
8105 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8106 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8107 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8108 gimplify_and_add (t
, pre_p
);
8111 /* Compute index to start of area used for integer regs. */
8114 /* int_addr = gpr + sav; */
8115 t
= fold_build_pointer_plus (sav
, gpr
);
8116 gimplify_assign (int_addr
, t
, pre_p
);
8120 /* sse_addr = fpr + sav; */
8121 t
= fold_build_pointer_plus (sav
, fpr
);
8122 gimplify_assign (sse_addr
, t
, pre_p
);
8126 int i
, prev_size
= 0;
8127 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8130 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8131 gimplify_assign (addr
, t
, pre_p
);
8133 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8135 rtx slot
= XVECEXP (container
, 0, i
);
8136 rtx reg
= XEXP (slot
, 0);
8137 enum machine_mode mode
= GET_MODE (reg
);
8143 tree dest_addr
, dest
;
8144 int cur_size
= GET_MODE_SIZE (mode
);
8146 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8147 prev_size
= INTVAL (XEXP (slot
, 1));
8148 if (prev_size
+ cur_size
> size
)
8150 cur_size
= size
- prev_size
;
8151 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8152 if (mode
== BLKmode
)
8155 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8156 if (mode
== GET_MODE (reg
))
8157 addr_type
= build_pointer_type (piece_type
);
8159 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8161 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8164 if (SSE_REGNO_P (REGNO (reg
)))
8166 src_addr
= sse_addr
;
8167 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8171 src_addr
= int_addr
;
8172 src_offset
= REGNO (reg
) * 8;
8174 src_addr
= fold_convert (addr_type
, src_addr
);
8175 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8177 dest_addr
= fold_convert (daddr_type
, addr
);
8178 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8179 if (cur_size
== GET_MODE_SIZE (mode
))
8181 src
= build_va_arg_indirect_ref (src_addr
);
8182 dest
= build_va_arg_indirect_ref (dest_addr
);
8184 gimplify_assign (dest
, src
, pre_p
);
8189 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8190 3, dest_addr
, src_addr
,
8191 size_int (cur_size
));
8192 gimplify_and_add (copy
, pre_p
);
8194 prev_size
+= cur_size
;
8200 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8201 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8202 gimplify_assign (gpr
, t
, pre_p
);
8207 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8208 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8209 gimplify_assign (fpr
, t
, pre_p
);
8212 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8214 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8217 /* ... otherwise out of the overflow area. */
8219 /* When we align parameter on stack for caller, if the parameter
8220 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8221 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8222 here with caller. */
8223 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8224 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8225 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8227 /* Care for on-stack alignment if needed. */
8228 if (arg_boundary
<= 64 || size
== 0)
8232 HOST_WIDE_INT align
= arg_boundary
/ 8;
8233 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8234 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8235 build_int_cst (TREE_TYPE (t
), -align
));
8238 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8239 gimplify_assign (addr
, t
, pre_p
);
8241 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8242 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8245 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8247 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8248 addr
= fold_convert (ptrtype
, addr
);
8251 addr
= build_va_arg_indirect_ref (addr
);
8252 return build_va_arg_indirect_ref (addr
);
8255 /* Return true if OPNUM's MEM should be matched
8256 in movabs* patterns. */
8259 ix86_check_movabs (rtx insn
, int opnum
)
8263 set
= PATTERN (insn
);
8264 if (GET_CODE (set
) == PARALLEL
)
8265 set
= XVECEXP (set
, 0, 0);
8266 gcc_assert (GET_CODE (set
) == SET
);
8267 mem
= XEXP (set
, opnum
);
8268 while (GET_CODE (mem
) == SUBREG
)
8269 mem
= SUBREG_REG (mem
);
8270 gcc_assert (MEM_P (mem
));
8271 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8274 /* Initialize the table of extra 80387 mathematical constants. */
8277 init_ext_80387_constants (void)
8279 static const char * cst
[5] =
8281 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8282 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8283 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8284 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8285 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8289 for (i
= 0; i
< 5; i
++)
8291 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8292 /* Ensure each constant is rounded to XFmode precision. */
8293 real_convert (&ext_80387_constants_table
[i
],
8294 XFmode
, &ext_80387_constants_table
[i
]);
8297 ext_80387_constants_init
= 1;
8300 /* Return non-zero if the constant is something that
8301 can be loaded with a special instruction. */
8304 standard_80387_constant_p (rtx x
)
8306 enum machine_mode mode
= GET_MODE (x
);
8310 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8313 if (x
== CONST0_RTX (mode
))
8315 if (x
== CONST1_RTX (mode
))
8318 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8320 /* For XFmode constants, try to find a special 80387 instruction when
8321 optimizing for size or on those CPUs that benefit from them. */
8323 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8327 if (! ext_80387_constants_init
)
8328 init_ext_80387_constants ();
8330 for (i
= 0; i
< 5; i
++)
8331 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8335 /* Load of the constant -0.0 or -1.0 will be split as
8336 fldz;fchs or fld1;fchs sequence. */
8337 if (real_isnegzero (&r
))
8339 if (real_identical (&r
, &dconstm1
))
8345 /* Return the opcode of the special instruction to be used to load
8349 standard_80387_constant_opcode (rtx x
)
8351 switch (standard_80387_constant_p (x
))
8375 /* Return the CONST_DOUBLE representing the 80387 constant that is
8376 loaded by the specified special instruction. The argument IDX
8377 matches the return value from standard_80387_constant_p. */
8380 standard_80387_constant_rtx (int idx
)
8384 if (! ext_80387_constants_init
)
8385 init_ext_80387_constants ();
8401 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8405 /* Return 1 if X is all 0s and 2 if x is all 1s
8406 in supported SSE/AVX vector mode. */
8409 standard_sse_constant_p (rtx x
)
8411 enum machine_mode mode
= GET_MODE (x
);
8413 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8415 if (vector_all_ones_operand (x
, mode
))
8437 /* Return the opcode of the special instruction to be used to load
8441 standard_sse_constant_opcode (rtx insn
, rtx x
)
8443 switch (standard_sse_constant_p (x
))
8446 switch (get_attr_mode (insn
))
8449 return "%vpxor\t%0, %d0";
8451 return "%vxorpd\t%0, %d0";
8453 return "%vxorps\t%0, %d0";
8456 return "vpxor\t%x0, %x0, %x0";
8458 return "vxorpd\t%x0, %x0, %x0";
8460 return "vxorps\t%x0, %x0, %x0";
8468 return "vpcmpeqd\t%0, %0, %0";
8470 return "pcmpeqd\t%0, %0";
8478 /* Returns true if OP contains a symbol reference */
8481 symbolic_reference_mentioned_p (rtx op
)
8486 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8489 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8490 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8496 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8497 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8501 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8508 /* Return true if it is appropriate to emit `ret' instructions in the
8509 body of a function. Do this only if the epilogue is simple, needing a
8510 couple of insns. Prior to reloading, we can't tell how many registers
8511 must be saved, so return false then. Return false if there is no frame
8512 marker to de-allocate. */
8515 ix86_can_use_return_insn_p (void)
8517 struct ix86_frame frame
;
8519 if (! reload_completed
|| frame_pointer_needed
)
8522 /* Don't allow more than 32k pop, since that's all we can do
8523 with one instruction. */
8524 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8527 ix86_compute_frame_layout (&frame
);
8528 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8529 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8532 /* Value should be nonzero if functions must have frame pointers.
8533 Zero means the frame pointer need not be set up (and parms may
8534 be accessed via the stack pointer) in functions that seem suitable. */
8537 ix86_frame_pointer_required (void)
8539 /* If we accessed previous frames, then the generated code expects
8540 to be able to access the saved ebp value in our frame. */
8541 if (cfun
->machine
->accesses_prev_frame
)
8544 /* Several x86 os'es need a frame pointer for other reasons,
8545 usually pertaining to setjmp. */
8546 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8549 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8550 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8553 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8554 turns off the frame pointer by default. Turn it back on now if
8555 we've not got a leaf function. */
8556 if (TARGET_OMIT_LEAF_FRAME_POINTER
8557 && (!current_function_is_leaf
8558 || ix86_current_function_calls_tls_descriptor
))
8561 if (crtl
->profile
&& !flag_fentry
)
8567 /* Record that the current function accesses previous call frames. */
8570 ix86_setup_frame_addresses (void)
8572 cfun
->machine
->accesses_prev_frame
= 1;
8575 #ifndef USE_HIDDEN_LINKONCE
8576 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8577 # define USE_HIDDEN_LINKONCE 1
8579 # define USE_HIDDEN_LINKONCE 0
8583 static int pic_labels_used
;
8585 /* Fills in the label name that should be used for a pc thunk for
8586 the given register. */
8589 get_pc_thunk_name (char name
[32], unsigned int regno
)
8591 gcc_assert (!TARGET_64BIT
);
8593 if (USE_HIDDEN_LINKONCE
)
8594 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8596 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8600 /* This function generates code for -fpic that loads %ebx with
8601 the return address of the caller and then returns. */
8604 ix86_code_end (void)
8609 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8614 if (!(pic_labels_used
& (1 << regno
)))
8617 get_pc_thunk_name (name
, regno
);
8619 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8620 get_identifier (name
),
8621 build_function_type_list (void_type_node
, NULL_TREE
));
8622 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8623 NULL_TREE
, void_type_node
);
8624 TREE_PUBLIC (decl
) = 1;
8625 TREE_STATIC (decl
) = 1;
8626 DECL_IGNORED_P (decl
) = 1;
8631 switch_to_section (darwin_sections
[text_coal_section
]);
8632 fputs ("\t.weak_definition\t", asm_out_file
);
8633 assemble_name (asm_out_file
, name
);
8634 fputs ("\n\t.private_extern\t", asm_out_file
);
8635 assemble_name (asm_out_file
, name
);
8636 putc ('\n', asm_out_file
);
8637 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8638 DECL_WEAK (decl
) = 1;
8642 if (USE_HIDDEN_LINKONCE
)
8644 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8646 targetm
.asm_out
.unique_section (decl
, 0);
8647 switch_to_section (get_named_section (decl
, NULL
, 0));
8649 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8650 fputs ("\t.hidden\t", asm_out_file
);
8651 assemble_name (asm_out_file
, name
);
8652 putc ('\n', asm_out_file
);
8653 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8657 switch_to_section (text_section
);
8658 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8661 DECL_INITIAL (decl
) = make_node (BLOCK
);
8662 current_function_decl
= decl
;
8663 init_function_start (decl
);
8664 first_function_block_is_cold
= false;
8665 /* Make sure unwind info is emitted for the thunk if needed. */
8666 final_start_function (emit_barrier (), asm_out_file
, 1);
8668 /* Pad stack IP move with 4 instructions (two NOPs count
8669 as one instruction). */
8670 if (TARGET_PAD_SHORT_FUNCTION
)
8675 fputs ("\tnop\n", asm_out_file
);
8678 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8679 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8680 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8681 fputs ("\tret\n", asm_out_file
);
8682 final_end_function ();
8683 init_insn_lengths ();
8684 free_after_compilation (cfun
);
8686 current_function_decl
= NULL
;
8689 if (flag_split_stack
)
8690 file_end_indicate_split_stack ();
8693 /* Emit code for the SET_GOT patterns. */
8696 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8702 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8704 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8705 xops
[2] = gen_rtx_MEM (Pmode
,
8706 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8707 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8709 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8710 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8711 an unadorned address. */
8712 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8713 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8714 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8718 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8722 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8724 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8727 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8728 is what will be referenced by the Mach-O PIC subsystem. */
8730 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8733 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8734 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8739 get_pc_thunk_name (name
, REGNO (dest
));
8740 pic_labels_used
|= 1 << REGNO (dest
);
8742 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8743 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8744 output_asm_insn ("call\t%X2", xops
);
8745 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8746 is what will be referenced by the Mach-O PIC subsystem. */
8749 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8751 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8752 CODE_LABEL_NUMBER (label
));
8757 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8762 /* Generate an "push" pattern for input ARG. */
8767 struct machine_function
*m
= cfun
->machine
;
8769 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8770 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8771 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8773 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8774 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8776 return gen_rtx_SET (VOIDmode
,
8777 gen_rtx_MEM (word_mode
,
8778 gen_rtx_PRE_DEC (Pmode
,
8779 stack_pointer_rtx
)),
8783 /* Generate an "pop" pattern for input ARG. */
8788 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8789 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8791 return gen_rtx_SET (VOIDmode
,
8793 gen_rtx_MEM (word_mode
,
8794 gen_rtx_POST_INC (Pmode
,
8795 stack_pointer_rtx
)));
8798 /* Return >= 0 if there is an unused call-clobbered register available
8799 for the entire function. */
8802 ix86_select_alt_pic_regnum (void)
8804 if (current_function_is_leaf
8806 && !ix86_current_function_calls_tls_descriptor
)
8809 /* Can't use the same register for both PIC and DRAP. */
8811 drap
= REGNO (crtl
->drap_reg
);
8814 for (i
= 2; i
>= 0; --i
)
8815 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8819 return INVALID_REGNUM
;
8822 /* Return TRUE if we need to save REGNO. */
8825 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8827 if (pic_offset_table_rtx
8828 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8829 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8831 || crtl
->calls_eh_return
8832 || crtl
->uses_const_pool
))
8833 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8835 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8840 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8841 if (test
== INVALID_REGNUM
)
8848 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8851 return (df_regs_ever_live_p (regno
)
8852 && !call_used_regs
[regno
]
8853 && !fixed_regs
[regno
]
8854 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8857 /* Return number of saved general prupose registers. */
8860 ix86_nsaved_regs (void)
8865 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8866 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8871 /* Return number of saved SSE registrers. */
8874 ix86_nsaved_sseregs (void)
8879 if (!TARGET_64BIT_MS_ABI
)
8881 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8882 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8887 /* Given FROM and TO register numbers, say whether this elimination is
8888 allowed. If stack alignment is needed, we can only replace argument
8889 pointer with hard frame pointer, or replace frame pointer with stack
8890 pointer. Otherwise, frame pointer elimination is automatically
8891 handled and all other eliminations are valid. */
8894 ix86_can_eliminate (const int from
, const int to
)
8896 if (stack_realign_fp
)
8897 return ((from
== ARG_POINTER_REGNUM
8898 && to
== HARD_FRAME_POINTER_REGNUM
)
8899 || (from
== FRAME_POINTER_REGNUM
8900 && to
== STACK_POINTER_REGNUM
));
8902 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8905 /* Return the offset between two registers, one to be eliminated, and the other
8906 its replacement, at the start of a routine. */
8909 ix86_initial_elimination_offset (int from
, int to
)
8911 struct ix86_frame frame
;
8912 ix86_compute_frame_layout (&frame
);
8914 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8915 return frame
.hard_frame_pointer_offset
;
8916 else if (from
== FRAME_POINTER_REGNUM
8917 && to
== HARD_FRAME_POINTER_REGNUM
)
8918 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8921 gcc_assert (to
== STACK_POINTER_REGNUM
);
8923 if (from
== ARG_POINTER_REGNUM
)
8924 return frame
.stack_pointer_offset
;
8926 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8927 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8931 /* In a dynamically-aligned function, we can't know the offset from
8932 stack pointer to frame pointer, so we must ensure that setjmp
8933 eliminates fp against the hard fp (%ebp) rather than trying to
8934 index from %esp up to the top of the frame across a gap that is
8935 of unknown (at compile-time) size. */
8937 ix86_builtin_setjmp_frame_value (void)
8939 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8942 /* When using -fsplit-stack, the allocation routines set a field in
8943 the TCB to the bottom of the stack plus this much space, measured
8946 #define SPLIT_STACK_AVAILABLE 256
8948 /* Fill structure ix86_frame about frame of currently computed function. */
8951 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8953 unsigned HOST_WIDE_INT stack_alignment_needed
;
8954 HOST_WIDE_INT offset
;
8955 unsigned HOST_WIDE_INT preferred_alignment
;
8956 HOST_WIDE_INT size
= get_frame_size ();
8957 HOST_WIDE_INT to_allocate
;
8959 frame
->nregs
= ix86_nsaved_regs ();
8960 frame
->nsseregs
= ix86_nsaved_sseregs ();
8962 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8963 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8965 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8966 function prologues and leaf. */
8967 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8968 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
8969 || ix86_current_function_calls_tls_descriptor
))
8971 preferred_alignment
= 16;
8972 stack_alignment_needed
= 16;
8973 crtl
->preferred_stack_boundary
= 128;
8974 crtl
->stack_alignment_needed
= 128;
8977 gcc_assert (!size
|| stack_alignment_needed
);
8978 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8979 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8981 /* For SEH we have to limit the amount of code movement into the prologue.
8982 At present we do this via a BLOCKAGE, at which point there's very little
8983 scheduling that can be done, which means that there's very little point
8984 in doing anything except PUSHs. */
8986 cfun
->machine
->use_fast_prologue_epilogue
= false;
8988 /* During reload iteration the amount of registers saved can change.
8989 Recompute the value as needed. Do not recompute when amount of registers
8990 didn't change as reload does multiple calls to the function and does not
8991 expect the decision to change within single iteration. */
8992 else if (!optimize_function_for_size_p (cfun
)
8993 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8995 int count
= frame
->nregs
;
8996 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8998 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9000 /* The fast prologue uses move instead of push to save registers. This
9001 is significantly longer, but also executes faster as modern hardware
9002 can execute the moves in parallel, but can't do that for push/pop.
9004 Be careful about choosing what prologue to emit: When function takes
9005 many instructions to execute we may use slow version as well as in
9006 case function is known to be outside hot spot (this is known with
9007 feedback only). Weight the size of function by number of registers
9008 to save as it is cheap to use one or two push instructions but very
9009 slow to use many of them. */
9011 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9012 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9013 || (flag_branch_probabilities
9014 && node
->frequency
< NODE_FREQUENCY_HOT
))
9015 cfun
->machine
->use_fast_prologue_epilogue
= false;
9017 cfun
->machine
->use_fast_prologue_epilogue
9018 = !expensive_function_p (count
);
9021 frame
->save_regs_using_mov
9022 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9023 /* If static stack checking is enabled and done with probes,
9024 the registers need to be saved before allocating the frame. */
9025 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9027 /* Skip return address. */
9028 offset
= UNITS_PER_WORD
;
9030 /* Skip pushed static chain. */
9031 if (ix86_static_chain_on_stack
)
9032 offset
+= UNITS_PER_WORD
;
9034 /* Skip saved base pointer. */
9035 if (frame_pointer_needed
)
9036 offset
+= UNITS_PER_WORD
;
9037 frame
->hfp_save_offset
= offset
;
9039 /* The traditional frame pointer location is at the top of the frame. */
9040 frame
->hard_frame_pointer_offset
= offset
;
9042 /* Register save area */
9043 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9044 frame
->reg_save_offset
= offset
;
9046 /* Align and set SSE register save area. */
9047 if (frame
->nsseregs
)
9049 /* The only ABI that has saved SSE registers (Win64) also has a
9050 16-byte aligned default stack, and thus we don't need to be
9051 within the re-aligned local stack frame to save them. */
9052 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9053 offset
= (offset
+ 16 - 1) & -16;
9054 offset
+= frame
->nsseregs
* 16;
9056 frame
->sse_reg_save_offset
= offset
;
9058 /* The re-aligned stack starts here. Values before this point are not
9059 directly comparable with values below this point. In order to make
9060 sure that no value happens to be the same before and after, force
9061 the alignment computation below to add a non-zero value. */
9062 if (stack_realign_fp
)
9063 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9066 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9067 offset
+= frame
->va_arg_size
;
9069 /* Align start of frame for local function. */
9070 if (stack_realign_fp
9071 || offset
!= frame
->sse_reg_save_offset
9073 || !current_function_is_leaf
9074 || cfun
->calls_alloca
9075 || ix86_current_function_calls_tls_descriptor
)
9076 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9078 /* Frame pointer points here. */
9079 frame
->frame_pointer_offset
= offset
;
9083 /* Add outgoing arguments area. Can be skipped if we eliminated
9084 all the function calls as dead code.
9085 Skipping is however impossible when function calls alloca. Alloca
9086 expander assumes that last crtl->outgoing_args_size
9087 of stack frame are unused. */
9088 if (ACCUMULATE_OUTGOING_ARGS
9089 && (!current_function_is_leaf
|| cfun
->calls_alloca
9090 || ix86_current_function_calls_tls_descriptor
))
9092 offset
+= crtl
->outgoing_args_size
;
9093 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9096 frame
->outgoing_arguments_size
= 0;
9098 /* Align stack boundary. Only needed if we're calling another function
9100 if (!current_function_is_leaf
|| cfun
->calls_alloca
9101 || ix86_current_function_calls_tls_descriptor
)
9102 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9104 /* We've reached end of stack frame. */
9105 frame
->stack_pointer_offset
= offset
;
9107 /* Size prologue needs to allocate. */
9108 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9110 if ((!to_allocate
&& frame
->nregs
<= 1)
9111 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9112 frame
->save_regs_using_mov
= false;
9114 if (ix86_using_red_zone ()
9115 && current_function_sp_is_unchanging
9116 && current_function_is_leaf
9117 && !ix86_current_function_calls_tls_descriptor
)
9119 frame
->red_zone_size
= to_allocate
;
9120 if (frame
->save_regs_using_mov
)
9121 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9122 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9123 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9126 frame
->red_zone_size
= 0;
9127 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9129 /* The SEH frame pointer location is near the bottom of the frame.
9130 This is enforced by the fact that the difference between the
9131 stack pointer and the frame pointer is limited to 240 bytes in
9132 the unwind data structure. */
9137 /* If we can leave the frame pointer where it is, do so. */
9138 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9139 if (diff
> 240 || (diff
& 15) != 0)
9141 /* Ideally we'd determine what portion of the local stack frame
9142 (within the constraint of the lowest 240) is most heavily used.
9143 But without that complication, simply bias the frame pointer
9144 by 128 bytes so as to maximize the amount of the local stack
9145 frame that is addressable with 8-bit offsets. */
9146 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9151 /* This is semi-inlined memory_address_length, but simplified
9152 since we know that we're always dealing with reg+offset, and
9153 to avoid having to create and discard all that rtl. */
9156 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9162 /* EBP and R13 cannot be encoded without an offset. */
9163 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9165 else if (IN_RANGE (offset
, -128, 127))
9168 /* ESP and R12 must be encoded with a SIB byte. */
9169 if (regno
== SP_REG
|| regno
== R12_REG
)
9175 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9176 The valid base registers are taken from CFUN->MACHINE->FS. */
9179 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9181 const struct machine_function
*m
= cfun
->machine
;
9182 rtx base_reg
= NULL
;
9183 HOST_WIDE_INT base_offset
= 0;
9185 if (m
->use_fast_prologue_epilogue
)
9187 /* Choose the base register most likely to allow the most scheduling
9188 opportunities. Generally FP is valid throughout the function,
9189 while DRAP must be reloaded within the epilogue. But choose either
9190 over the SP due to increased encoding size. */
9194 base_reg
= hard_frame_pointer_rtx
;
9195 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9197 else if (m
->fs
.drap_valid
)
9199 base_reg
= crtl
->drap_reg
;
9200 base_offset
= 0 - cfa_offset
;
9202 else if (m
->fs
.sp_valid
)
9204 base_reg
= stack_pointer_rtx
;
9205 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9210 HOST_WIDE_INT toffset
;
9213 /* Choose the base register with the smallest address encoding.
9214 With a tie, choose FP > DRAP > SP. */
9217 base_reg
= stack_pointer_rtx
;
9218 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9219 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9221 if (m
->fs
.drap_valid
)
9223 toffset
= 0 - cfa_offset
;
9224 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9227 base_reg
= crtl
->drap_reg
;
9228 base_offset
= toffset
;
9234 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9235 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9238 base_reg
= hard_frame_pointer_rtx
;
9239 base_offset
= toffset
;
9244 gcc_assert (base_reg
!= NULL
);
9246 return plus_constant (Pmode
, base_reg
, base_offset
);
9249 /* Emit code to save registers in the prologue. */
9252 ix86_emit_save_regs (void)
9257 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9258 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9260 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9261 RTX_FRAME_RELATED_P (insn
) = 1;
9265 /* Emit a single register save at CFA - CFA_OFFSET. */
9268 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9269 HOST_WIDE_INT cfa_offset
)
9271 struct machine_function
*m
= cfun
->machine
;
9272 rtx reg
= gen_rtx_REG (mode
, regno
);
9273 rtx mem
, addr
, base
, insn
;
9275 addr
= choose_baseaddr (cfa_offset
);
9276 mem
= gen_frame_mem (mode
, addr
);
9278 /* For SSE saves, we need to indicate the 128-bit alignment. */
9279 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9281 insn
= emit_move_insn (mem
, reg
);
9282 RTX_FRAME_RELATED_P (insn
) = 1;
9285 if (GET_CODE (base
) == PLUS
)
9286 base
= XEXP (base
, 0);
9287 gcc_checking_assert (REG_P (base
));
9289 /* When saving registers into a re-aligned local stack frame, avoid
9290 any tricky guessing by dwarf2out. */
9291 if (m
->fs
.realigned
)
9293 gcc_checking_assert (stack_realign_drap
);
9295 if (regno
== REGNO (crtl
->drap_reg
))
9297 /* A bit of a hack. We force the DRAP register to be saved in
9298 the re-aligned stack frame, which provides us with a copy
9299 of the CFA that will last past the prologue. Install it. */
9300 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9301 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9302 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9303 mem
= gen_rtx_MEM (mode
, addr
);
9304 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9308 /* The frame pointer is a stable reference within the
9309 aligned frame. Use it. */
9310 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9311 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9312 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9313 mem
= gen_rtx_MEM (mode
, addr
);
9314 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9315 gen_rtx_SET (VOIDmode
, mem
, reg
));
9319 /* The memory may not be relative to the current CFA register,
9320 which means that we may need to generate a new pattern for
9321 use by the unwind info. */
9322 else if (base
!= m
->fs
.cfa_reg
)
9324 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9325 m
->fs
.cfa_offset
- cfa_offset
);
9326 mem
= gen_rtx_MEM (mode
, addr
);
9327 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9331 /* Emit code to save registers using MOV insns.
9332 First register is stored at CFA - CFA_OFFSET. */
9334 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9338 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9339 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9341 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9342 cfa_offset
-= UNITS_PER_WORD
;
9346 /* Emit code to save SSE registers using MOV insns.
9347 First register is stored at CFA - CFA_OFFSET. */
9349 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9353 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9354 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9356 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9361 static GTY(()) rtx queued_cfa_restores
;
9363 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9364 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9365 Don't add the note if the previously saved value will be left untouched
9366 within stack red-zone till return, as unwinders can find the same value
9367 in the register and on the stack. */
9370 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9372 if (!crtl
->shrink_wrapped
9373 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9378 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9379 RTX_FRAME_RELATED_P (insn
) = 1;
9383 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9386 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9389 ix86_add_queued_cfa_restore_notes (rtx insn
)
9392 if (!queued_cfa_restores
)
9394 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9396 XEXP (last
, 1) = REG_NOTES (insn
);
9397 REG_NOTES (insn
) = queued_cfa_restores
;
9398 queued_cfa_restores
= NULL_RTX
;
9399 RTX_FRAME_RELATED_P (insn
) = 1;
9402 /* Expand prologue or epilogue stack adjustment.
9403 The pattern exist to put a dependency on all ebp-based memory accesses.
9404 STYLE should be negative if instructions should be marked as frame related,
9405 zero if %r11 register is live and cannot be freely used and positive
9409 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9410 int style
, bool set_cfa
)
9412 struct machine_function
*m
= cfun
->machine
;
9414 bool add_frame_related_expr
= false;
9416 if (Pmode
== SImode
)
9417 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9418 else if (x86_64_immediate_operand (offset
, DImode
))
9419 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9423 /* r11 is used by indirect sibcall return as well, set before the
9424 epilogue and used after the epilogue. */
9426 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9429 gcc_assert (src
!= hard_frame_pointer_rtx
9430 && dest
!= hard_frame_pointer_rtx
);
9431 tmp
= hard_frame_pointer_rtx
;
9433 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9435 add_frame_related_expr
= true;
9437 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9440 insn
= emit_insn (insn
);
9442 ix86_add_queued_cfa_restore_notes (insn
);
9448 gcc_assert (m
->fs
.cfa_reg
== src
);
9449 m
->fs
.cfa_offset
+= INTVAL (offset
);
9450 m
->fs
.cfa_reg
= dest
;
9452 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9453 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9454 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9455 RTX_FRAME_RELATED_P (insn
) = 1;
9459 RTX_FRAME_RELATED_P (insn
) = 1;
9460 if (add_frame_related_expr
)
9462 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9463 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9464 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9468 if (dest
== stack_pointer_rtx
)
9470 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9471 bool valid
= m
->fs
.sp_valid
;
9473 if (src
== hard_frame_pointer_rtx
)
9475 valid
= m
->fs
.fp_valid
;
9476 ooffset
= m
->fs
.fp_offset
;
9478 else if (src
== crtl
->drap_reg
)
9480 valid
= m
->fs
.drap_valid
;
9485 /* Else there are two possibilities: SP itself, which we set
9486 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9487 taken care of this by hand along the eh_return path. */
9488 gcc_checking_assert (src
== stack_pointer_rtx
9489 || offset
== const0_rtx
);
9492 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9493 m
->fs
.sp_valid
= valid
;
9497 /* Find an available register to be used as dynamic realign argument
9498 pointer regsiter. Such a register will be written in prologue and
9499 used in begin of body, so it must not be
9500 1. parameter passing register.
9502 We reuse static-chain register if it is available. Otherwise, we
9503 use DI for i386 and R13 for x86-64. We chose R13 since it has
9506 Return: the regno of chosen register. */
9509 find_drap_reg (void)
9511 tree decl
= cfun
->decl
;
9515 /* Use R13 for nested function or function need static chain.
9516 Since function with tail call may use any caller-saved
9517 registers in epilogue, DRAP must not use caller-saved
9518 register in such case. */
9519 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9526 /* Use DI for nested function or function need static chain.
9527 Since function with tail call may use any caller-saved
9528 registers in epilogue, DRAP must not use caller-saved
9529 register in such case. */
9530 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9533 /* Reuse static chain register if it isn't used for parameter
9535 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9537 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9538 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9545 /* Return minimum incoming stack alignment. */
9548 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9550 unsigned int incoming_stack_boundary
;
9552 /* Prefer the one specified at command line. */
9553 if (ix86_user_incoming_stack_boundary
)
9554 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9555 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9556 if -mstackrealign is used, it isn't used for sibcall check and
9557 estimated stack alignment is 128bit. */
9560 && ix86_force_align_arg_pointer
9561 && crtl
->stack_alignment_estimated
== 128)
9562 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9564 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9566 /* Incoming stack alignment can be changed on individual functions
9567 via force_align_arg_pointer attribute. We use the smallest
9568 incoming stack boundary. */
9569 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9570 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9571 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9572 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9574 /* The incoming stack frame has to be aligned at least at
9575 parm_stack_boundary. */
9576 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9577 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9579 /* Stack at entrance of main is aligned by runtime. We use the
9580 smallest incoming stack boundary. */
9581 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9582 && DECL_NAME (current_function_decl
)
9583 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9584 && DECL_FILE_SCOPE_P (current_function_decl
))
9585 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9587 return incoming_stack_boundary
;
9590 /* Update incoming stack boundary and estimated stack alignment. */
9593 ix86_update_stack_boundary (void)
9595 ix86_incoming_stack_boundary
9596 = ix86_minimum_incoming_stack_boundary (false);
9598 /* x86_64 vararg needs 16byte stack alignment for register save
9602 && crtl
->stack_alignment_estimated
< 128)
9603 crtl
->stack_alignment_estimated
= 128;
9606 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9607 needed or an rtx for DRAP otherwise. */
9610 ix86_get_drap_rtx (void)
9612 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9613 crtl
->need_drap
= true;
9615 if (stack_realign_drap
)
9617 /* Assign DRAP to vDRAP and returns vDRAP */
9618 unsigned int regno
= find_drap_reg ();
9623 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9624 crtl
->drap_reg
= arg_ptr
;
9627 drap_vreg
= copy_to_reg (arg_ptr
);
9631 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9634 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9635 RTX_FRAME_RELATED_P (insn
) = 1;
9643 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9646 ix86_internal_arg_pointer (void)
9648 return virtual_incoming_args_rtx
;
9651 struct scratch_reg
{
9656 /* Return a short-lived scratch register for use on function entry.
9657 In 32-bit mode, it is valid only after the registers are saved
9658 in the prologue. This register must be released by means of
9659 release_scratch_register_on_entry once it is dead. */
9662 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9670 /* We always use R11 in 64-bit mode. */
9675 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9677 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9678 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9679 int regparm
= ix86_function_regparm (fntype
, decl
);
9681 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9683 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9684 for the static chain register. */
9685 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9686 && drap_regno
!= AX_REG
)
9688 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9690 /* ecx is the static chain register. */
9691 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9692 && drap_regno
!= CX_REG
)
9694 else if (ix86_save_reg (BX_REG
, true))
9696 /* esi is the static chain register. */
9697 else if (!(regparm
== 3 && static_chain_p
)
9698 && ix86_save_reg (SI_REG
, true))
9700 else if (ix86_save_reg (DI_REG
, true))
9704 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9709 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9712 rtx insn
= emit_insn (gen_push (sr
->reg
));
9713 RTX_FRAME_RELATED_P (insn
) = 1;
9717 /* Release a scratch register obtained from the preceding function. */
9720 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9724 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9726 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9727 RTX_FRAME_RELATED_P (insn
) = 1;
9728 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9729 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9730 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9734 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9736 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9739 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9741 /* We skip the probe for the first interval + a small dope of 4 words and
9742 probe that many bytes past the specified size to maintain a protection
9743 area at the botton of the stack. */
9744 const int dope
= 4 * UNITS_PER_WORD
;
9745 rtx size_rtx
= GEN_INT (size
), last
;
9747 /* See if we have a constant small number of probes to generate. If so,
9748 that's the easy case. The run-time loop is made up of 11 insns in the
9749 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9750 for n # of intervals. */
9751 if (size
<= 5 * PROBE_INTERVAL
)
9753 HOST_WIDE_INT i
, adjust
;
9754 bool first_probe
= true;
9756 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9757 values of N from 1 until it exceeds SIZE. If only one probe is
9758 needed, this will not generate any code. Then adjust and probe
9759 to PROBE_INTERVAL + SIZE. */
9760 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9764 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9765 first_probe
= false;
9768 adjust
= PROBE_INTERVAL
;
9770 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9771 plus_constant (Pmode
, stack_pointer_rtx
,
9773 emit_stack_probe (stack_pointer_rtx
);
9777 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9779 adjust
= size
+ PROBE_INTERVAL
- i
;
9781 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9782 plus_constant (Pmode
, stack_pointer_rtx
,
9784 emit_stack_probe (stack_pointer_rtx
);
9786 /* Adjust back to account for the additional first interval. */
9787 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9788 plus_constant (Pmode
, stack_pointer_rtx
,
9789 PROBE_INTERVAL
+ dope
)));
9792 /* Otherwise, do the same as above, but in a loop. Note that we must be
9793 extra careful with variables wrapping around because we might be at
9794 the very top (or the very bottom) of the address space and we have
9795 to be able to handle this case properly; in particular, we use an
9796 equality test for the loop condition. */
9799 HOST_WIDE_INT rounded_size
;
9800 struct scratch_reg sr
;
9802 get_scratch_register_on_entry (&sr
);
9805 /* Step 1: round SIZE to the previous multiple of the interval. */
9807 rounded_size
= size
& -PROBE_INTERVAL
;
9810 /* Step 2: compute initial and final value of the loop counter. */
9812 /* SP = SP_0 + PROBE_INTERVAL. */
9813 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9814 plus_constant (Pmode
, stack_pointer_rtx
,
9815 - (PROBE_INTERVAL
+ dope
))));
9817 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9818 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9819 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9820 gen_rtx_PLUS (Pmode
, sr
.reg
,
9821 stack_pointer_rtx
)));
9826 while (SP != LAST_ADDR)
9828 SP = SP + PROBE_INTERVAL
9832 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9833 values of N from 1 until it is equal to ROUNDED_SIZE. */
9835 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9838 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9839 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9841 if (size
!= rounded_size
)
9843 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9844 plus_constant (Pmode
, stack_pointer_rtx
,
9845 rounded_size
- size
)));
9846 emit_stack_probe (stack_pointer_rtx
);
9849 /* Adjust back to account for the additional first interval. */
9850 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9851 plus_constant (Pmode
, stack_pointer_rtx
,
9852 PROBE_INTERVAL
+ dope
)));
9854 release_scratch_register_on_entry (&sr
);
9857 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9859 /* Even if the stack pointer isn't the CFA register, we need to correctly
9860 describe the adjustments made to it, in particular differentiate the
9861 frame-related ones from the frame-unrelated ones. */
9864 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9865 XVECEXP (expr
, 0, 0)
9866 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9867 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9868 XVECEXP (expr
, 0, 1)
9869 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9870 plus_constant (Pmode
, stack_pointer_rtx
,
9871 PROBE_INTERVAL
+ dope
+ size
));
9872 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9873 RTX_FRAME_RELATED_P (last
) = 1;
9875 cfun
->machine
->fs
.sp_offset
+= size
;
9878 /* Make sure nothing is scheduled before we are done. */
9879 emit_insn (gen_blockage ());
9882 /* Adjust the stack pointer up to REG while probing it. */
9885 output_adjust_stack_and_probe (rtx reg
)
9887 static int labelno
= 0;
9888 char loop_lab
[32], end_lab
[32];
9891 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9892 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9894 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9896 /* Jump to END_LAB if SP == LAST_ADDR. */
9897 xops
[0] = stack_pointer_rtx
;
9899 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9900 fputs ("\tje\t", asm_out_file
);
9901 assemble_name_raw (asm_out_file
, end_lab
);
9902 fputc ('\n', asm_out_file
);
9904 /* SP = SP + PROBE_INTERVAL. */
9905 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9906 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9909 xops
[1] = const0_rtx
;
9910 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9912 fprintf (asm_out_file
, "\tjmp\t");
9913 assemble_name_raw (asm_out_file
, loop_lab
);
9914 fputc ('\n', asm_out_file
);
9916 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9921 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9922 inclusive. These are offsets from the current stack pointer. */
9925 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9927 /* See if we have a constant small number of probes to generate. If so,
9928 that's the easy case. The run-time loop is made up of 7 insns in the
9929 generic case while the compile-time loop is made up of n insns for n #
9931 if (size
<= 7 * PROBE_INTERVAL
)
9935 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9936 it exceeds SIZE. If only one probe is needed, this will not
9937 generate any code. Then probe at FIRST + SIZE. */
9938 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9939 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9942 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9946 /* Otherwise, do the same as above, but in a loop. Note that we must be
9947 extra careful with variables wrapping around because we might be at
9948 the very top (or the very bottom) of the address space and we have
9949 to be able to handle this case properly; in particular, we use an
9950 equality test for the loop condition. */
9953 HOST_WIDE_INT rounded_size
, last
;
9954 struct scratch_reg sr
;
9956 get_scratch_register_on_entry (&sr
);
9959 /* Step 1: round SIZE to the previous multiple of the interval. */
9961 rounded_size
= size
& -PROBE_INTERVAL
;
9964 /* Step 2: compute initial and final value of the loop counter. */
9966 /* TEST_OFFSET = FIRST. */
9967 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9969 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9970 last
= first
+ rounded_size
;
9975 while (TEST_ADDR != LAST_ADDR)
9977 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9981 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9982 until it is equal to ROUNDED_SIZE. */
9984 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9987 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9988 that SIZE is equal to ROUNDED_SIZE. */
9990 if (size
!= rounded_size
)
9991 emit_stack_probe (plus_constant (Pmode
,
9992 gen_rtx_PLUS (Pmode
,
9995 rounded_size
- size
));
9997 release_scratch_register_on_entry (&sr
);
10000 /* Make sure nothing is scheduled before we are done. */
10001 emit_insn (gen_blockage ());
10004 /* Probe a range of stack addresses from REG to END, inclusive. These are
10005 offsets from the current stack pointer. */
10008 output_probe_stack_range (rtx reg
, rtx end
)
10010 static int labelno
= 0;
10011 char loop_lab
[32], end_lab
[32];
10014 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10015 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10017 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10019 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10022 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10023 fputs ("\tje\t", asm_out_file
);
10024 assemble_name_raw (asm_out_file
, end_lab
);
10025 fputc ('\n', asm_out_file
);
10027 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10028 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10029 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10031 /* Probe at TEST_ADDR. */
10032 xops
[0] = stack_pointer_rtx
;
10034 xops
[2] = const0_rtx
;
10035 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10037 fprintf (asm_out_file
, "\tjmp\t");
10038 assemble_name_raw (asm_out_file
, loop_lab
);
10039 fputc ('\n', asm_out_file
);
10041 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10046 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10047 to be generated in correct form. */
10049 ix86_finalize_stack_realign_flags (void)
10051 /* Check if stack realign is really needed after reload, and
10052 stores result in cfun */
10053 unsigned int incoming_stack_boundary
10054 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10055 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10056 unsigned int stack_realign
= (incoming_stack_boundary
10057 < (current_function_is_leaf
10058 ? crtl
->max_used_stack_slot_alignment
10059 : crtl
->stack_alignment_needed
));
10061 if (crtl
->stack_realign_finalized
)
10063 /* After stack_realign_needed is finalized, we can't no longer
10065 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10069 /* If the only reason for frame_pointer_needed is that we conservatively
10070 assumed stack realignment might be needed, but in the end nothing that
10071 needed the stack alignment had been spilled, clear frame_pointer_needed
10072 and say we don't need stack realignment. */
10074 && !crtl
->need_drap
10075 && frame_pointer_needed
10076 && current_function_is_leaf
10077 && flag_omit_frame_pointer
10078 && current_function_sp_is_unchanging
10079 && !ix86_current_function_calls_tls_descriptor
10080 && !crtl
->accesses_prior_frames
10081 && !cfun
->calls_alloca
10082 && !crtl
->calls_eh_return
10083 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10084 && !ix86_frame_pointer_required ()
10085 && get_frame_size () == 0
10086 && ix86_nsaved_sseregs () == 0
10087 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10089 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10092 CLEAR_HARD_REG_SET (prologue_used
);
10093 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10094 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10095 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10096 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10097 HARD_FRAME_POINTER_REGNUM
);
10101 FOR_BB_INSNS (bb
, insn
)
10102 if (NONDEBUG_INSN_P (insn
)
10103 && requires_stack_frame_p (insn
, prologue_used
,
10104 set_up_by_prologue
))
10106 crtl
->stack_realign_needed
= stack_realign
;
10107 crtl
->stack_realign_finalized
= true;
10112 frame_pointer_needed
= false;
10113 stack_realign
= false;
10114 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10115 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10116 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10117 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10118 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10119 df_finish_pass (true);
10120 df_scan_alloc (NULL
);
10122 df_compute_regs_ever_live (true);
10126 crtl
->stack_realign_needed
= stack_realign
;
10127 crtl
->stack_realign_finalized
= true;
10130 /* Expand the prologue into a bunch of separate insns. */
10133 ix86_expand_prologue (void)
10135 struct machine_function
*m
= cfun
->machine
;
10138 struct ix86_frame frame
;
10139 HOST_WIDE_INT allocate
;
10140 bool int_registers_saved
;
10142 ix86_finalize_stack_realign_flags ();
10144 /* DRAP should not coexist with stack_realign_fp */
10145 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10147 memset (&m
->fs
, 0, sizeof (m
->fs
));
10149 /* Initialize CFA state for before the prologue. */
10150 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10151 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10153 /* Track SP offset to the CFA. We continue tracking this after we've
10154 swapped the CFA register away from SP. In the case of re-alignment
10155 this is fudged; we're interested to offsets within the local frame. */
10156 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10157 m
->fs
.sp_valid
= true;
10159 ix86_compute_frame_layout (&frame
);
10161 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10163 /* We should have already generated an error for any use of
10164 ms_hook on a nested function. */
10165 gcc_checking_assert (!ix86_static_chain_on_stack
);
10167 /* Check if profiling is active and we shall use profiling before
10168 prologue variant. If so sorry. */
10169 if (crtl
->profile
&& flag_fentry
!= 0)
10170 sorry ("ms_hook_prologue attribute isn%'t compatible "
10171 "with -mfentry for 32-bit");
10173 /* In ix86_asm_output_function_label we emitted:
10174 8b ff movl.s %edi,%edi
10176 8b ec movl.s %esp,%ebp
10178 This matches the hookable function prologue in Win32 API
10179 functions in Microsoft Windows XP Service Pack 2 and newer.
10180 Wine uses this to enable Windows apps to hook the Win32 API
10181 functions provided by Wine.
10183 What that means is that we've already set up the frame pointer. */
10185 if (frame_pointer_needed
10186 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10190 /* We've decided to use the frame pointer already set up.
10191 Describe this to the unwinder by pretending that both
10192 push and mov insns happen right here.
10194 Putting the unwind info here at the end of the ms_hook
10195 is done so that we can make absolutely certain we get
10196 the required byte sequence at the start of the function,
10197 rather than relying on an assembler that can produce
10198 the exact encoding required.
10200 However it does mean (in the unpatched case) that we have
10201 a 1 insn window where the asynchronous unwind info is
10202 incorrect. However, if we placed the unwind info at
10203 its correct location we would have incorrect unwind info
10204 in the patched case. Which is probably all moot since
10205 I don't expect Wine generates dwarf2 unwind info for the
10206 system libraries that use this feature. */
10208 insn
= emit_insn (gen_blockage ());
10210 push
= gen_push (hard_frame_pointer_rtx
);
10211 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10212 stack_pointer_rtx
);
10213 RTX_FRAME_RELATED_P (push
) = 1;
10214 RTX_FRAME_RELATED_P (mov
) = 1;
10216 RTX_FRAME_RELATED_P (insn
) = 1;
10217 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10218 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10220 /* Note that gen_push incremented m->fs.cfa_offset, even
10221 though we didn't emit the push insn here. */
10222 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10223 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10224 m
->fs
.fp_valid
= true;
10228 /* The frame pointer is not needed so pop %ebp again.
10229 This leaves us with a pristine state. */
10230 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10234 /* The first insn of a function that accepts its static chain on the
10235 stack is to push the register that would be filled in by a direct
10236 call. This insn will be skipped by the trampoline. */
10237 else if (ix86_static_chain_on_stack
)
10239 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10240 emit_insn (gen_blockage ());
10242 /* We don't want to interpret this push insn as a register save,
10243 only as a stack adjustment. The real copy of the register as
10244 a save will be done later, if needed. */
10245 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10246 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10247 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10248 RTX_FRAME_RELATED_P (insn
) = 1;
10251 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10252 of DRAP is needed and stack realignment is really needed after reload */
10253 if (stack_realign_drap
)
10255 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10257 /* Only need to push parameter pointer reg if it is caller saved. */
10258 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10260 /* Push arg pointer reg */
10261 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10262 RTX_FRAME_RELATED_P (insn
) = 1;
10265 /* Grab the argument pointer. */
10266 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10267 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10268 RTX_FRAME_RELATED_P (insn
) = 1;
10269 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10270 m
->fs
.cfa_offset
= 0;
10272 /* Align the stack. */
10273 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10275 GEN_INT (-align_bytes
)));
10276 RTX_FRAME_RELATED_P (insn
) = 1;
10278 /* Replicate the return address on the stack so that return
10279 address can be reached via (argp - 1) slot. This is needed
10280 to implement macro RETURN_ADDR_RTX and intrinsic function
10281 expand_builtin_return_addr etc. */
10282 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10283 t
= gen_frame_mem (word_mode
, t
);
10284 insn
= emit_insn (gen_push (t
));
10285 RTX_FRAME_RELATED_P (insn
) = 1;
10287 /* For the purposes of frame and register save area addressing,
10288 we've started over with a new frame. */
10289 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10290 m
->fs
.realigned
= true;
10293 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10295 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10296 slower on all targets. Also sdb doesn't like it. */
10297 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10298 RTX_FRAME_RELATED_P (insn
) = 1;
10300 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10302 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10303 RTX_FRAME_RELATED_P (insn
) = 1;
10305 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10306 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10307 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10308 m
->fs
.fp_valid
= true;
10312 int_registers_saved
= (frame
.nregs
== 0);
10314 if (!int_registers_saved
)
10316 /* If saving registers via PUSH, do so now. */
10317 if (!frame
.save_regs_using_mov
)
10319 ix86_emit_save_regs ();
10320 int_registers_saved
= true;
10321 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10324 /* When using red zone we may start register saving before allocating
10325 the stack frame saving one cycle of the prologue. However, avoid
10326 doing this if we have to probe the stack; at least on x86_64 the
10327 stack probe can turn into a call that clobbers a red zone location. */
10328 else if (ix86_using_red_zone ()
10329 && (! TARGET_STACK_PROBE
10330 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10332 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10333 int_registers_saved
= true;
10337 if (stack_realign_fp
)
10339 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10340 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10342 /* The computation of the size of the re-aligned stack frame means
10343 that we must allocate the size of the register save area before
10344 performing the actual alignment. Otherwise we cannot guarantee
10345 that there's enough storage above the realignment point. */
10346 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10347 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10348 GEN_INT (m
->fs
.sp_offset
10349 - frame
.sse_reg_save_offset
),
10352 /* Align the stack. */
10353 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10355 GEN_INT (-align_bytes
)));
10357 /* For the purposes of register save area addressing, the stack
10358 pointer is no longer valid. As for the value of sp_offset,
10359 see ix86_compute_frame_layout, which we need to match in order
10360 to pass verification of stack_pointer_offset at the end. */
10361 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10362 m
->fs
.sp_valid
= false;
10365 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10367 if (flag_stack_usage_info
)
10369 /* We start to count from ARG_POINTER. */
10370 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10372 /* If it was realigned, take into account the fake frame. */
10373 if (stack_realign_drap
)
10375 if (ix86_static_chain_on_stack
)
10376 stack_size
+= UNITS_PER_WORD
;
10378 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10379 stack_size
+= UNITS_PER_WORD
;
10381 /* This over-estimates by 1 minimal-stack-alignment-unit but
10382 mitigates that by counting in the new return address slot. */
10383 current_function_dynamic_stack_size
10384 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10387 current_function_static_stack_size
= stack_size
;
10390 /* The stack has already been decremented by the instruction calling us
10391 so probe if the size is non-negative to preserve the protection area. */
10392 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10394 /* We expect the registers to be saved when probes are used. */
10395 gcc_assert (int_registers_saved
);
10397 if (STACK_CHECK_MOVING_SP
)
10399 ix86_adjust_stack_and_probe (allocate
);
10404 HOST_WIDE_INT size
= allocate
;
10406 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10407 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10409 if (TARGET_STACK_PROBE
)
10410 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10412 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10418 else if (!ix86_target_stack_probe ()
10419 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10421 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10422 GEN_INT (-allocate
), -1,
10423 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10427 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10429 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10431 bool eax_live
= false;
10432 bool r10_live
= false;
10435 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10436 if (!TARGET_64BIT_MS_ABI
)
10437 eax_live
= ix86_eax_live_at_start_p ();
10441 emit_insn (gen_push (eax
));
10442 allocate
-= UNITS_PER_WORD
;
10446 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10447 emit_insn (gen_push (r10
));
10448 allocate
-= UNITS_PER_WORD
;
10451 emit_move_insn (eax
, GEN_INT (allocate
));
10452 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10454 /* Use the fact that AX still contains ALLOCATE. */
10455 adjust_stack_insn
= (Pmode
== DImode
10456 ? gen_pro_epilogue_adjust_stack_di_sub
10457 : gen_pro_epilogue_adjust_stack_si_sub
);
10459 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10460 stack_pointer_rtx
, eax
));
10462 /* Note that SEH directives need to continue tracking the stack
10463 pointer even after the frame pointer has been set up. */
10464 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10466 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10467 m
->fs
.cfa_offset
+= allocate
;
10469 RTX_FRAME_RELATED_P (insn
) = 1;
10470 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10471 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10472 plus_constant (Pmode
, stack_pointer_rtx
,
10475 m
->fs
.sp_offset
+= allocate
;
10477 if (r10_live
&& eax_live
)
10479 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10480 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10481 gen_frame_mem (word_mode
, t
));
10482 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10483 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10484 gen_frame_mem (word_mode
, t
));
10486 else if (eax_live
|| r10_live
)
10488 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10489 emit_move_insn (gen_rtx_REG (word_mode
,
10490 (eax_live
? AX_REG
: R10_REG
)),
10491 gen_frame_mem (word_mode
, t
));
10494 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10496 /* If we havn't already set up the frame pointer, do so now. */
10497 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10499 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10500 GEN_INT (frame
.stack_pointer_offset
10501 - frame
.hard_frame_pointer_offset
));
10502 insn
= emit_insn (insn
);
10503 RTX_FRAME_RELATED_P (insn
) = 1;
10504 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10506 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10507 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10508 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10509 m
->fs
.fp_valid
= true;
10512 if (!int_registers_saved
)
10513 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10514 if (frame
.nsseregs
)
10515 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10517 pic_reg_used
= false;
10518 if (pic_offset_table_rtx
10519 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10522 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10524 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10525 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10527 pic_reg_used
= true;
10534 if (ix86_cmodel
== CM_LARGE_PIC
)
10536 rtx label
, tmp_reg
;
10538 gcc_assert (Pmode
== DImode
);
10539 label
= gen_label_rtx ();
10540 emit_label (label
);
10541 LABEL_PRESERVE_P (label
) = 1;
10542 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10543 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10544 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10546 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10547 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10548 pic_offset_table_rtx
, tmp_reg
));
10551 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10555 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10556 RTX_FRAME_RELATED_P (insn
) = 1;
10557 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10561 /* In the pic_reg_used case, make sure that the got load isn't deleted
10562 when mcount needs it. Blockage to avoid call movement across mcount
10563 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10565 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10566 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10568 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10570 /* vDRAP is setup but after reload it turns out stack realign
10571 isn't necessary, here we will emit prologue to setup DRAP
10572 without stack realign adjustment */
10573 t
= choose_baseaddr (0);
10574 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10577 /* Prevent instructions from being scheduled into register save push
10578 sequence when access to the redzone area is done through frame pointer.
10579 The offset between the frame pointer and the stack pointer is calculated
10580 relative to the value of the stack pointer at the end of the function
10581 prologue, and moving instructions that access redzone area via frame
10582 pointer inside push sequence violates this assumption. */
10583 if (frame_pointer_needed
&& frame
.red_zone_size
)
10584 emit_insn (gen_memory_blockage ());
10586 /* Emit cld instruction if stringops are used in the function. */
10587 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10588 emit_insn (gen_cld ());
10590 /* SEH requires that the prologue end within 256 bytes of the start of
10591 the function. Prevent instruction schedules that would extend that.
10592 Further, prevent alloca modifications to the stack pointer from being
10593 combined with prologue modifications. */
10595 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10598 /* Emit code to restore REG using a POP insn. */
10601 ix86_emit_restore_reg_using_pop (rtx reg
)
10603 struct machine_function
*m
= cfun
->machine
;
10604 rtx insn
= emit_insn (gen_pop (reg
));
10606 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10607 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10609 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10610 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10612 /* Previously we'd represented the CFA as an expression
10613 like *(%ebp - 8). We've just popped that value from
10614 the stack, which means we need to reset the CFA to
10615 the drap register. This will remain until we restore
10616 the stack pointer. */
10617 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10618 RTX_FRAME_RELATED_P (insn
) = 1;
10620 /* This means that the DRAP register is valid for addressing too. */
10621 m
->fs
.drap_valid
= true;
10625 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10627 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10628 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10629 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10630 RTX_FRAME_RELATED_P (insn
) = 1;
10632 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10635 /* When the frame pointer is the CFA, and we pop it, we are
10636 swapping back to the stack pointer as the CFA. This happens
10637 for stack frames that don't allocate other data, so we assume
10638 the stack pointer is now pointing at the return address, i.e.
10639 the function entry state, which makes the offset be 1 word. */
10640 if (reg
== hard_frame_pointer_rtx
)
10642 m
->fs
.fp_valid
= false;
10643 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10645 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10646 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10648 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10649 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10650 GEN_INT (m
->fs
.cfa_offset
)));
10651 RTX_FRAME_RELATED_P (insn
) = 1;
10656 /* Emit code to restore saved registers using POP insns. */
10659 ix86_emit_restore_regs_using_pop (void)
10661 unsigned int regno
;
10663 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10664 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10665 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10668 /* Emit code and notes for the LEAVE instruction. */
10671 ix86_emit_leave (void)
10673 struct machine_function
*m
= cfun
->machine
;
10674 rtx insn
= emit_insn (ix86_gen_leave ());
10676 ix86_add_queued_cfa_restore_notes (insn
);
10678 gcc_assert (m
->fs
.fp_valid
);
10679 m
->fs
.sp_valid
= true;
10680 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10681 m
->fs
.fp_valid
= false;
10683 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10685 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10686 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10688 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10689 plus_constant (Pmode
, stack_pointer_rtx
,
10691 RTX_FRAME_RELATED_P (insn
) = 1;
10693 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10697 /* Emit code to restore saved registers using MOV insns.
10698 First register is restored from CFA - CFA_OFFSET. */
10700 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10701 bool maybe_eh_return
)
10703 struct machine_function
*m
= cfun
->machine
;
10704 unsigned int regno
;
10706 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10707 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10709 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10712 mem
= choose_baseaddr (cfa_offset
);
10713 mem
= gen_frame_mem (word_mode
, mem
);
10714 insn
= emit_move_insn (reg
, mem
);
10716 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10718 /* Previously we'd represented the CFA as an expression
10719 like *(%ebp - 8). We've just popped that value from
10720 the stack, which means we need to reset the CFA to
10721 the drap register. This will remain until we restore
10722 the stack pointer. */
10723 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10724 RTX_FRAME_RELATED_P (insn
) = 1;
10726 /* This means that the DRAP register is valid for addressing. */
10727 m
->fs
.drap_valid
= true;
10730 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10732 cfa_offset
-= UNITS_PER_WORD
;
10736 /* Emit code to restore saved registers using MOV insns.
10737 First register is restored from CFA - CFA_OFFSET. */
10739 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10740 bool maybe_eh_return
)
10742 unsigned int regno
;
10744 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10745 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10747 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10750 mem
= choose_baseaddr (cfa_offset
);
10751 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10752 set_mem_align (mem
, 128);
10753 emit_move_insn (reg
, mem
);
10755 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10761 /* Emit vzeroupper if needed. */
10764 ix86_maybe_emit_epilogue_vzeroupper (void)
10766 if (TARGET_VZEROUPPER
10767 && !TREE_THIS_VOLATILE (cfun
->decl
)
10768 && !cfun
->machine
->caller_return_avx256_p
)
10769 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10772 /* Restore function stack, frame, and registers. */
10775 ix86_expand_epilogue (int style
)
10777 struct machine_function
*m
= cfun
->machine
;
10778 struct machine_frame_state frame_state_save
= m
->fs
;
10779 struct ix86_frame frame
;
10780 bool restore_regs_via_mov
;
10783 ix86_finalize_stack_realign_flags ();
10784 ix86_compute_frame_layout (&frame
);
10786 m
->fs
.sp_valid
= (!frame_pointer_needed
10787 || (current_function_sp_is_unchanging
10788 && !stack_realign_fp
));
10789 gcc_assert (!m
->fs
.sp_valid
10790 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10792 /* The FP must be valid if the frame pointer is present. */
10793 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10794 gcc_assert (!m
->fs
.fp_valid
10795 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10797 /* We must have *some* valid pointer to the stack frame. */
10798 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10800 /* The DRAP is never valid at this point. */
10801 gcc_assert (!m
->fs
.drap_valid
);
10803 /* See the comment about red zone and frame
10804 pointer usage in ix86_expand_prologue. */
10805 if (frame_pointer_needed
&& frame
.red_zone_size
)
10806 emit_insn (gen_memory_blockage ());
10808 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10809 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10811 /* Determine the CFA offset of the end of the red-zone. */
10812 m
->fs
.red_zone_offset
= 0;
10813 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10815 /* The red-zone begins below the return address. */
10816 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10818 /* When the register save area is in the aligned portion of
10819 the stack, determine the maximum runtime displacement that
10820 matches up with the aligned frame. */
10821 if (stack_realign_drap
)
10822 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10826 /* Special care must be taken for the normal return case of a function
10827 using eh_return: the eax and edx registers are marked as saved, but
10828 not restored along this path. Adjust the save location to match. */
10829 if (crtl
->calls_eh_return
&& style
!= 2)
10830 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10832 /* EH_RETURN requires the use of moves to function properly. */
10833 if (crtl
->calls_eh_return
)
10834 restore_regs_via_mov
= true;
10835 /* SEH requires the use of pops to identify the epilogue. */
10836 else if (TARGET_SEH
)
10837 restore_regs_via_mov
= false;
10838 /* If we're only restoring one register and sp is not valid then
10839 using a move instruction to restore the register since it's
10840 less work than reloading sp and popping the register. */
10841 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10842 restore_regs_via_mov
= true;
10843 else if (TARGET_EPILOGUE_USING_MOVE
10844 && cfun
->machine
->use_fast_prologue_epilogue
10845 && (frame
.nregs
> 1
10846 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10847 restore_regs_via_mov
= true;
10848 else if (frame_pointer_needed
10850 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10851 restore_regs_via_mov
= true;
10852 else if (frame_pointer_needed
10853 && TARGET_USE_LEAVE
10854 && cfun
->machine
->use_fast_prologue_epilogue
10855 && frame
.nregs
== 1)
10856 restore_regs_via_mov
= true;
10858 restore_regs_via_mov
= false;
10860 if (restore_regs_via_mov
|| frame
.nsseregs
)
10862 /* Ensure that the entire register save area is addressable via
10863 the stack pointer, if we will restore via sp. */
10865 && m
->fs
.sp_offset
> 0x7fffffff
10866 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10867 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10869 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10870 GEN_INT (m
->fs
.sp_offset
10871 - frame
.sse_reg_save_offset
),
10873 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10877 /* If there are any SSE registers to restore, then we have to do it
10878 via moves, since there's obviously no pop for SSE regs. */
10879 if (frame
.nsseregs
)
10880 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10883 if (restore_regs_via_mov
)
10888 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10890 /* eh_return epilogues need %ecx added to the stack pointer. */
10893 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10895 /* Stack align doesn't work with eh_return. */
10896 gcc_assert (!stack_realign_drap
);
10897 /* Neither does regparm nested functions. */
10898 gcc_assert (!ix86_static_chain_on_stack
);
10900 if (frame_pointer_needed
)
10902 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10903 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10904 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10906 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10907 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10909 /* Note that we use SA as a temporary CFA, as the return
10910 address is at the proper place relative to it. We
10911 pretend this happens at the FP restore insn because
10912 prior to this insn the FP would be stored at the wrong
10913 offset relative to SA, and after this insn we have no
10914 other reasonable register to use for the CFA. We don't
10915 bother resetting the CFA to the SP for the duration of
10916 the return insn. */
10917 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10918 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
10919 ix86_add_queued_cfa_restore_notes (insn
);
10920 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10921 RTX_FRAME_RELATED_P (insn
) = 1;
10923 m
->fs
.cfa_reg
= sa
;
10924 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10925 m
->fs
.fp_valid
= false;
10927 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10928 const0_rtx
, style
, false);
10932 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10933 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10934 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10935 ix86_add_queued_cfa_restore_notes (insn
);
10937 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10938 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10940 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10941 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10942 plus_constant (Pmode
, stack_pointer_rtx
,
10944 RTX_FRAME_RELATED_P (insn
) = 1;
10947 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10948 m
->fs
.sp_valid
= true;
10953 /* SEH requires that the function end with (1) a stack adjustment
10954 if necessary, (2) a sequence of pops, and (3) a return or
10955 jump instruction. Prevent insns from the function body from
10956 being scheduled into this sequence. */
10959 /* Prevent a catch region from being adjacent to the standard
10960 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10961 several other flags that would be interesting to test are
10963 if (flag_non_call_exceptions
)
10964 emit_insn (gen_nops (const1_rtx
));
10966 emit_insn (gen_blockage ());
10969 /* First step is to deallocate the stack frame so that we can
10970 pop the registers. */
10971 if (!m
->fs
.sp_valid
)
10973 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10974 GEN_INT (m
->fs
.fp_offset
10975 - frame
.reg_save_offset
),
10978 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10980 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10981 GEN_INT (m
->fs
.sp_offset
10982 - frame
.reg_save_offset
),
10984 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10987 ix86_emit_restore_regs_using_pop ();
10990 /* If we used a stack pointer and haven't already got rid of it,
10992 if (m
->fs
.fp_valid
)
10994 /* If the stack pointer is valid and pointing at the frame
10995 pointer store address, then we only need a pop. */
10996 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10997 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10998 /* Leave results in shorter dependency chains on CPUs that are
10999 able to grok it fast. */
11000 else if (TARGET_USE_LEAVE
11001 || optimize_function_for_size_p (cfun
)
11002 || !cfun
->machine
->use_fast_prologue_epilogue
)
11003 ix86_emit_leave ();
11006 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11007 hard_frame_pointer_rtx
,
11008 const0_rtx
, style
, !using_drap
);
11009 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11015 int param_ptr_offset
= UNITS_PER_WORD
;
11018 gcc_assert (stack_realign_drap
);
11020 if (ix86_static_chain_on_stack
)
11021 param_ptr_offset
+= UNITS_PER_WORD
;
11022 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11023 param_ptr_offset
+= UNITS_PER_WORD
;
11025 insn
= emit_insn (gen_rtx_SET
11026 (VOIDmode
, stack_pointer_rtx
,
11027 gen_rtx_PLUS (Pmode
,
11029 GEN_INT (-param_ptr_offset
))));
11030 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11031 m
->fs
.cfa_offset
= param_ptr_offset
;
11032 m
->fs
.sp_offset
= param_ptr_offset
;
11033 m
->fs
.realigned
= false;
11035 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11036 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11037 GEN_INT (param_ptr_offset
)));
11038 RTX_FRAME_RELATED_P (insn
) = 1;
11040 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11041 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11044 /* At this point the stack pointer must be valid, and we must have
11045 restored all of the registers. We may not have deallocated the
11046 entire stack frame. We've delayed this until now because it may
11047 be possible to merge the local stack deallocation with the
11048 deallocation forced by ix86_static_chain_on_stack. */
11049 gcc_assert (m
->fs
.sp_valid
);
11050 gcc_assert (!m
->fs
.fp_valid
);
11051 gcc_assert (!m
->fs
.realigned
);
11052 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11054 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11055 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11059 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11061 /* Sibcall epilogues don't want a return instruction. */
11064 m
->fs
= frame_state_save
;
11068 /* Emit vzeroupper if needed. */
11069 ix86_maybe_emit_epilogue_vzeroupper ();
11071 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11073 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11075 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11076 address, do explicit add, and jump indirectly to the caller. */
11078 if (crtl
->args
.pops_args
>= 65536)
11080 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11083 /* There is no "pascal" calling convention in any 64bit ABI. */
11084 gcc_assert (!TARGET_64BIT
);
11086 insn
= emit_insn (gen_pop (ecx
));
11087 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11088 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11090 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11091 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11092 add_reg_note (insn
, REG_CFA_REGISTER
,
11093 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11094 RTX_FRAME_RELATED_P (insn
) = 1;
11096 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11098 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11101 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11104 emit_jump_insn (gen_simple_return_internal ());
11106 /* Restore the state back to the state from the prologue,
11107 so that it's correct for the next epilogue. */
11108 m
->fs
= frame_state_save
;
11111 /* Reset from the function's potential modifications. */
11114 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11115 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11117 if (pic_offset_table_rtx
)
11118 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11120 /* Mach-O doesn't support labels at the end of objects, so if
11121 it looks like we might want one, insert a NOP. */
11123 rtx insn
= get_last_insn ();
11124 rtx deleted_debug_label
= NULL_RTX
;
11127 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11129 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11130 notes only, instead set their CODE_LABEL_NUMBER to -1,
11131 otherwise there would be code generation differences
11132 in between -g and -g0. */
11133 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11134 deleted_debug_label
= insn
;
11135 insn
= PREV_INSN (insn
);
11140 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11141 fputs ("\tnop\n", file
);
11142 else if (deleted_debug_label
)
11143 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11144 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11145 CODE_LABEL_NUMBER (insn
) = -1;
11151 /* Return a scratch register to use in the split stack prologue. The
11152 split stack prologue is used for -fsplit-stack. It is the first
11153 instructions in the function, even before the regular prologue.
11154 The scratch register can be any caller-saved register which is not
11155 used for parameters or for the static chain. */
11157 static unsigned int
11158 split_stack_prologue_scratch_regno (void)
11167 is_fastcall
= (lookup_attribute ("fastcall",
11168 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11170 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11174 if (DECL_STATIC_CHAIN (cfun
->decl
))
11176 sorry ("-fsplit-stack does not support fastcall with "
11177 "nested function");
11178 return INVALID_REGNUM
;
11182 else if (regparm
< 3)
11184 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11190 sorry ("-fsplit-stack does not support 2 register "
11191 " parameters for a nested function");
11192 return INVALID_REGNUM
;
11199 /* FIXME: We could make this work by pushing a register
11200 around the addition and comparison. */
11201 sorry ("-fsplit-stack does not support 3 register parameters");
11202 return INVALID_REGNUM
;
11207 /* A SYMBOL_REF for the function which allocates new stackspace for
11210 static GTY(()) rtx split_stack_fn
;
11212 /* A SYMBOL_REF for the more stack function when using the large
11215 static GTY(()) rtx split_stack_fn_large
;
11217 /* Handle -fsplit-stack. These are the first instructions in the
11218 function, even before the regular prologue. */
11221 ix86_expand_split_stack_prologue (void)
11223 struct ix86_frame frame
;
11224 HOST_WIDE_INT allocate
;
11225 unsigned HOST_WIDE_INT args_size
;
11226 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11227 rtx scratch_reg
= NULL_RTX
;
11228 rtx varargs_label
= NULL_RTX
;
11231 gcc_assert (flag_split_stack
&& reload_completed
);
11233 ix86_finalize_stack_realign_flags ();
11234 ix86_compute_frame_layout (&frame
);
11235 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11237 /* This is the label we will branch to if we have enough stack
11238 space. We expect the basic block reordering pass to reverse this
11239 branch if optimizing, so that we branch in the unlikely case. */
11240 label
= gen_label_rtx ();
11242 /* We need to compare the stack pointer minus the frame size with
11243 the stack boundary in the TCB. The stack boundary always gives
11244 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11245 can compare directly. Otherwise we need to do an addition. */
11247 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11248 UNSPEC_STACK_CHECK
);
11249 limit
= gen_rtx_CONST (Pmode
, limit
);
11250 limit
= gen_rtx_MEM (Pmode
, limit
);
11251 if (allocate
< SPLIT_STACK_AVAILABLE
)
11252 current
= stack_pointer_rtx
;
11255 unsigned int scratch_regno
;
11258 /* We need a scratch register to hold the stack pointer minus
11259 the required frame size. Since this is the very start of the
11260 function, the scratch register can be any caller-saved
11261 register which is not used for parameters. */
11262 offset
= GEN_INT (- allocate
);
11263 scratch_regno
= split_stack_prologue_scratch_regno ();
11264 if (scratch_regno
== INVALID_REGNUM
)
11266 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11267 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11269 /* We don't use ix86_gen_add3 in this case because it will
11270 want to split to lea, but when not optimizing the insn
11271 will not be split after this point. */
11272 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11273 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11278 emit_move_insn (scratch_reg
, offset
);
11279 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11280 stack_pointer_rtx
));
11282 current
= scratch_reg
;
11285 ix86_expand_branch (GEU
, current
, limit
, label
);
11286 jump_insn
= get_last_insn ();
11287 JUMP_LABEL (jump_insn
) = label
;
11289 /* Mark the jump as very likely to be taken. */
11290 add_reg_note (jump_insn
, REG_BR_PROB
,
11291 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11293 if (split_stack_fn
== NULL_RTX
)
11294 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11295 fn
= split_stack_fn
;
11297 /* Get more stack space. We pass in the desired stack space and the
11298 size of the arguments to copy to the new stack. In 32-bit mode
11299 we push the parameters; __morestack will return on a new stack
11300 anyhow. In 64-bit mode we pass the parameters in r10 and
11302 allocate_rtx
= GEN_INT (allocate
);
11303 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11304 call_fusage
= NULL_RTX
;
11309 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11310 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11312 /* If this function uses a static chain, it will be in %r10.
11313 Preserve it across the call to __morestack. */
11314 if (DECL_STATIC_CHAIN (cfun
->decl
))
11318 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11319 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11320 use_reg (&call_fusage
, rax
);
11323 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11325 HOST_WIDE_INT argval
;
11327 gcc_assert (Pmode
== DImode
);
11328 /* When using the large model we need to load the address
11329 into a register, and we've run out of registers. So we
11330 switch to a different calling convention, and we call a
11331 different function: __morestack_large. We pass the
11332 argument size in the upper 32 bits of r10 and pass the
11333 frame size in the lower 32 bits. */
11334 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11335 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11337 if (split_stack_fn_large
== NULL_RTX
)
11338 split_stack_fn_large
=
11339 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11341 if (ix86_cmodel
== CM_LARGE_PIC
)
11345 label
= gen_label_rtx ();
11346 emit_label (label
);
11347 LABEL_PRESERVE_P (label
) = 1;
11348 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11349 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11350 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11351 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11353 x
= gen_rtx_CONST (Pmode
, x
);
11354 emit_move_insn (reg11
, x
);
11355 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11356 x
= gen_const_mem (Pmode
, x
);
11357 emit_move_insn (reg11
, x
);
11360 emit_move_insn (reg11
, split_stack_fn_large
);
11364 argval
= ((args_size
<< 16) << 16) + allocate
;
11365 emit_move_insn (reg10
, GEN_INT (argval
));
11369 emit_move_insn (reg10
, allocate_rtx
);
11370 emit_move_insn (reg11
, GEN_INT (args_size
));
11371 use_reg (&call_fusage
, reg11
);
11374 use_reg (&call_fusage
, reg10
);
11378 emit_insn (gen_push (GEN_INT (args_size
)));
11379 emit_insn (gen_push (allocate_rtx
));
11381 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11382 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11384 add_function_usage_to (call_insn
, call_fusage
);
11386 /* In order to make call/return prediction work right, we now need
11387 to execute a return instruction. See
11388 libgcc/config/i386/morestack.S for the details on how this works.
11390 For flow purposes gcc must not see this as a return
11391 instruction--we need control flow to continue at the subsequent
11392 label. Therefore, we use an unspec. */
11393 gcc_assert (crtl
->args
.pops_args
< 65536);
11394 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11396 /* If we are in 64-bit mode and this function uses a static chain,
11397 we saved %r10 in %rax before calling _morestack. */
11398 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11399 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11400 gen_rtx_REG (word_mode
, AX_REG
));
11402 /* If this function calls va_start, we need to store a pointer to
11403 the arguments on the old stack, because they may not have been
11404 all copied to the new stack. At this point the old stack can be
11405 found at the frame pointer value used by __morestack, because
11406 __morestack has set that up before calling back to us. Here we
11407 store that pointer in a scratch register, and in
11408 ix86_expand_prologue we store the scratch register in a stack
11410 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11412 unsigned int scratch_regno
;
11416 scratch_regno
= split_stack_prologue_scratch_regno ();
11417 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11418 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11422 return address within this function
11423 return address of caller of this function
11425 So we add three words to get to the stack arguments.
11429 return address within this function
11430 first argument to __morestack
11431 second argument to __morestack
11432 return address of caller of this function
11434 So we add five words to get to the stack arguments.
11436 words
= TARGET_64BIT
? 3 : 5;
11437 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11438 gen_rtx_PLUS (Pmode
, frame_reg
,
11439 GEN_INT (words
* UNITS_PER_WORD
))));
11441 varargs_label
= gen_label_rtx ();
11442 emit_jump_insn (gen_jump (varargs_label
));
11443 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11448 emit_label (label
);
11449 LABEL_NUSES (label
) = 1;
11451 /* If this function calls va_start, we now have to set the scratch
11452 register for the case where we do not call __morestack. In this
11453 case we need to set it based on the stack pointer. */
11454 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11456 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11457 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11458 GEN_INT (UNITS_PER_WORD
))));
11460 emit_label (varargs_label
);
11461 LABEL_NUSES (varargs_label
) = 1;
11465 /* We may have to tell the dataflow pass that the split stack prologue
11466 is initializing a scratch register. */
11469 ix86_live_on_entry (bitmap regs
)
11471 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11473 gcc_assert (flag_split_stack
);
11474 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11478 /* Determine if op is suitable SUBREG RTX for address. */
11481 ix86_address_subreg_operand (rtx op
)
11483 enum machine_mode mode
;
11488 mode
= GET_MODE (op
);
11490 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11493 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11494 failures when the register is one word out of a two word structure. */
11495 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11498 /* Allow only SUBREGs of non-eliminable hard registers. */
11499 return register_no_elim_operand (op
, mode
);
11502 /* Extract the parts of an RTL expression that is a valid memory address
11503 for an instruction. Return 0 if the structure of the address is
11504 grossly off. Return -1 if the address contains ASHIFT, so it is not
11505 strictly valid, but still used for computing length of lea instruction. */
11508 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11510 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11511 rtx base_reg
, index_reg
;
11512 HOST_WIDE_INT scale
= 1;
11513 rtx scale_rtx
= NULL_RTX
;
11516 enum ix86_address_seg seg
= SEG_DEFAULT
;
11518 /* Allow zero-extended SImode addresses,
11519 they will be emitted with addr32 prefix. */
11520 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11522 if (GET_CODE (addr
) == ZERO_EXTEND
11523 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11524 addr
= XEXP (addr
, 0);
11525 else if (GET_CODE (addr
) == AND
11526 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11528 addr
= XEXP (addr
, 0);
11530 /* Adjust SUBREGs. */
11531 if (GET_CODE (addr
) == SUBREG
11532 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11533 addr
= SUBREG_REG (addr
);
11534 else if (GET_MODE (addr
) == DImode
)
11535 addr
= gen_rtx_SUBREG (SImode
, addr
, 0);
11536 else if (GET_MODE (addr
) != VOIDmode
)
11543 else if (GET_CODE (addr
) == SUBREG
)
11545 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11550 else if (GET_CODE (addr
) == PLUS
)
11552 rtx addends
[4], op
;
11560 addends
[n
++] = XEXP (op
, 1);
11563 while (GET_CODE (op
) == PLUS
);
11568 for (i
= n
; i
>= 0; --i
)
11571 switch (GET_CODE (op
))
11576 index
= XEXP (op
, 0);
11577 scale_rtx
= XEXP (op
, 1);
11583 index
= XEXP (op
, 0);
11584 tmp
= XEXP (op
, 1);
11585 if (!CONST_INT_P (tmp
))
11587 scale
= INTVAL (tmp
);
11588 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11590 scale
= 1 << scale
;
11595 if (GET_CODE (op
) != UNSPEC
)
11600 if (XINT (op
, 1) == UNSPEC_TP
11601 && TARGET_TLS_DIRECT_SEG_REFS
11602 && seg
== SEG_DEFAULT
)
11603 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11609 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11636 else if (GET_CODE (addr
) == MULT
)
11638 index
= XEXP (addr
, 0); /* index*scale */
11639 scale_rtx
= XEXP (addr
, 1);
11641 else if (GET_CODE (addr
) == ASHIFT
)
11643 /* We're called for lea too, which implements ashift on occasion. */
11644 index
= XEXP (addr
, 0);
11645 tmp
= XEXP (addr
, 1);
11646 if (!CONST_INT_P (tmp
))
11648 scale
= INTVAL (tmp
);
11649 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11651 scale
= 1 << scale
;
11655 disp
= addr
; /* displacement */
11661 else if (GET_CODE (index
) == SUBREG
11662 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11668 /* Address override works only on the (%reg) part of %fs:(%reg). */
11669 if (seg
!= SEG_DEFAULT
11670 && ((base
&& GET_MODE (base
) != word_mode
)
11671 || (index
&& GET_MODE (index
) != word_mode
)))
11674 /* Extract the integral value of scale. */
11677 if (!CONST_INT_P (scale_rtx
))
11679 scale
= INTVAL (scale_rtx
);
11682 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11683 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11685 /* Avoid useless 0 displacement. */
11686 if (disp
== const0_rtx
&& (base
|| index
))
11689 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11690 if (base_reg
&& index_reg
&& scale
== 1
11691 && (index_reg
== arg_pointer_rtx
11692 || index_reg
== frame_pointer_rtx
11693 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11696 tmp
= base
, base
= index
, index
= tmp
;
11697 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11700 /* Special case: %ebp cannot be encoded as a base without a displacement.
11704 && (base_reg
== hard_frame_pointer_rtx
11705 || base_reg
== frame_pointer_rtx
11706 || base_reg
== arg_pointer_rtx
11707 || (REG_P (base_reg
)
11708 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11709 || REGNO (base_reg
) == R13_REG
))))
11712 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11713 Avoid this by transforming to [%esi+0].
11714 Reload calls address legitimization without cfun defined, so we need
11715 to test cfun for being non-NULL. */
11716 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11717 && base_reg
&& !index_reg
&& !disp
11718 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11721 /* Special case: encode reg+reg instead of reg*2. */
11722 if (!base
&& index
&& scale
== 2)
11723 base
= index
, base_reg
= index_reg
, scale
= 1;
11725 /* Special case: scaling cannot be encoded without base or displacement. */
11726 if (!base
&& !disp
&& index
&& scale
!= 1)
11730 out
->index
= index
;
11732 out
->scale
= scale
;
11738 /* Return cost of the memory address x.
11739 For i386, it is better to use a complex address than let gcc copy
11740 the address into a reg and make a new pseudo. But not if the address
11741 requires to two regs - that would mean more pseudos with longer
11744 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11746 struct ix86_address parts
;
11748 int ok
= ix86_decompose_address (x
, &parts
);
11752 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11753 parts
.base
= SUBREG_REG (parts
.base
);
11754 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11755 parts
.index
= SUBREG_REG (parts
.index
);
11757 /* Attempt to minimize number of registers in the address. */
11759 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11761 && (!REG_P (parts
.index
)
11762 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11766 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11768 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11769 && parts
.base
!= parts
.index
)
11772 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11773 since it's predecode logic can't detect the length of instructions
11774 and it degenerates to vector decoded. Increase cost of such
11775 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11776 to split such addresses or even refuse such addresses at all.
11778 Following addressing modes are affected:
11783 The first and last case may be avoidable by explicitly coding the zero in
11784 memory address, but I don't have AMD-K6 machine handy to check this
11788 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11789 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11790 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11796 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11797 this is used for to form addresses to local data when -fPIC is in
11801 darwin_local_data_pic (rtx disp
)
11803 return (GET_CODE (disp
) == UNSPEC
11804 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11807 /* Determine if a given RTX is a valid constant. We already know this
11808 satisfies CONSTANT_P. */
11811 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11813 switch (GET_CODE (x
))
11818 if (GET_CODE (x
) == PLUS
)
11820 if (!CONST_INT_P (XEXP (x
, 1)))
11825 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11828 /* Only some unspecs are valid as "constants". */
11829 if (GET_CODE (x
) == UNSPEC
)
11830 switch (XINT (x
, 1))
11833 case UNSPEC_GOTOFF
:
11834 case UNSPEC_PLTOFF
:
11835 return TARGET_64BIT
;
11837 case UNSPEC_NTPOFF
:
11838 x
= XVECEXP (x
, 0, 0);
11839 return (GET_CODE (x
) == SYMBOL_REF
11840 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11841 case UNSPEC_DTPOFF
:
11842 x
= XVECEXP (x
, 0, 0);
11843 return (GET_CODE (x
) == SYMBOL_REF
11844 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11849 /* We must have drilled down to a symbol. */
11850 if (GET_CODE (x
) == LABEL_REF
)
11852 if (GET_CODE (x
) != SYMBOL_REF
)
11857 /* TLS symbols are never valid. */
11858 if (SYMBOL_REF_TLS_MODEL (x
))
11861 /* DLLIMPORT symbols are never valid. */
11862 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11863 && SYMBOL_REF_DLLIMPORT_P (x
))
11867 /* mdynamic-no-pic */
11868 if (MACHO_DYNAMIC_NO_PIC_P
)
11869 return machopic_symbol_defined_p (x
);
11874 if (GET_MODE (x
) == TImode
11875 && x
!= CONST0_RTX (TImode
)
11881 if (!standard_sse_constant_p (x
))
11888 /* Otherwise we handle everything else in the move patterns. */
11892 /* Determine if it's legal to put X into the constant pool. This
11893 is not possible for the address of thread-local symbols, which
11894 is checked above. */
11897 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11899 /* We can always put integral constants and vectors in memory. */
11900 switch (GET_CODE (x
))
11910 return !ix86_legitimate_constant_p (mode
, x
);
11914 /* Nonzero if the constant value X is a legitimate general operand
11915 when generating PIC code. It is given that flag_pic is on and
11916 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11919 legitimate_pic_operand_p (rtx x
)
11923 switch (GET_CODE (x
))
11926 inner
= XEXP (x
, 0);
11927 if (GET_CODE (inner
) == PLUS
11928 && CONST_INT_P (XEXP (inner
, 1)))
11929 inner
= XEXP (inner
, 0);
11931 /* Only some unspecs are valid as "constants". */
11932 if (GET_CODE (inner
) == UNSPEC
)
11933 switch (XINT (inner
, 1))
11936 case UNSPEC_GOTOFF
:
11937 case UNSPEC_PLTOFF
:
11938 return TARGET_64BIT
;
11940 x
= XVECEXP (inner
, 0, 0);
11941 return (GET_CODE (x
) == SYMBOL_REF
11942 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11943 case UNSPEC_MACHOPIC_OFFSET
:
11944 return legitimate_pic_address_disp_p (x
);
11952 return legitimate_pic_address_disp_p (x
);
11959 /* Determine if a given CONST RTX is a valid memory displacement
11963 legitimate_pic_address_disp_p (rtx disp
)
11967 /* In 64bit mode we can allow direct addresses of symbols and labels
11968 when they are not dynamic symbols. */
11971 rtx op0
= disp
, op1
;
11973 switch (GET_CODE (disp
))
11979 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11981 op0
= XEXP (XEXP (disp
, 0), 0);
11982 op1
= XEXP (XEXP (disp
, 0), 1);
11983 if (!CONST_INT_P (op1
)
11984 || INTVAL (op1
) >= 16*1024*1024
11985 || INTVAL (op1
) < -16*1024*1024)
11987 if (GET_CODE (op0
) == LABEL_REF
)
11989 if (GET_CODE (op0
) == CONST
11990 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
11991 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
11993 if (GET_CODE (op0
) == UNSPEC
11994 && XINT (op0
, 1) == UNSPEC_PCREL
)
11996 if (GET_CODE (op0
) != SYMBOL_REF
)
12001 /* TLS references should always be enclosed in UNSPEC. */
12002 if (SYMBOL_REF_TLS_MODEL (op0
))
12004 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12005 && ix86_cmodel
!= CM_LARGE_PIC
)
12013 if (GET_CODE (disp
) != CONST
)
12015 disp
= XEXP (disp
, 0);
12019 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12020 of GOT tables. We should not need these anyway. */
12021 if (GET_CODE (disp
) != UNSPEC
12022 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12023 && XINT (disp
, 1) != UNSPEC_GOTOFF
12024 && XINT (disp
, 1) != UNSPEC_PCREL
12025 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12028 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12029 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12035 if (GET_CODE (disp
) == PLUS
)
12037 if (!CONST_INT_P (XEXP (disp
, 1)))
12039 disp
= XEXP (disp
, 0);
12043 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12046 if (GET_CODE (disp
) != UNSPEC
)
12049 switch (XINT (disp
, 1))
12054 /* We need to check for both symbols and labels because VxWorks loads
12055 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12057 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12058 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12059 case UNSPEC_GOTOFF
:
12060 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12061 While ABI specify also 32bit relocation but we don't produce it in
12062 small PIC model at all. */
12063 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12064 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12066 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12068 case UNSPEC_GOTTPOFF
:
12069 case UNSPEC_GOTNTPOFF
:
12070 case UNSPEC_INDNTPOFF
:
12073 disp
= XVECEXP (disp
, 0, 0);
12074 return (GET_CODE (disp
) == SYMBOL_REF
12075 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12076 case UNSPEC_NTPOFF
:
12077 disp
= XVECEXP (disp
, 0, 0);
12078 return (GET_CODE (disp
) == SYMBOL_REF
12079 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12080 case UNSPEC_DTPOFF
:
12081 disp
= XVECEXP (disp
, 0, 0);
12082 return (GET_CODE (disp
) == SYMBOL_REF
12083 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12089 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12090 replace the input X, or the original X if no replacement is called for.
12091 The output parameter *WIN is 1 if the calling macro should goto WIN,
12092 0 if it should not. */
12095 ix86_legitimize_reload_address (rtx x
,
12096 enum machine_mode mode ATTRIBUTE_UNUSED
,
12097 int opnum
, int type
,
12098 int ind_levels ATTRIBUTE_UNUSED
)
12100 /* Reload can generate:
12102 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12106 This RTX is rejected from ix86_legitimate_address_p due to
12107 non-strictness of base register 97. Following this rejection,
12108 reload pushes all three components into separate registers,
12109 creating invalid memory address RTX.
12111 Following code reloads only the invalid part of the
12112 memory address RTX. */
12114 if (GET_CODE (x
) == PLUS
12115 && REG_P (XEXP (x
, 1))
12116 && GET_CODE (XEXP (x
, 0)) == PLUS
12117 && REG_P (XEXP (XEXP (x
, 0), 1)))
12120 bool something_reloaded
= false;
12122 base
= XEXP (XEXP (x
, 0), 1);
12123 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12125 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12126 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12127 opnum
, (enum reload_type
) type
);
12128 something_reloaded
= true;
12131 index
= XEXP (x
, 1);
12132 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12134 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12135 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12136 opnum
, (enum reload_type
) type
);
12137 something_reloaded
= true;
12140 gcc_assert (something_reloaded
);
12147 /* Recognizes RTL expressions that are valid memory addresses for an
12148 instruction. The MODE argument is the machine mode for the MEM
12149 expression that wants to use this address.
12151 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12152 convert common non-canonical forms to canonical form so that they will
12156 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12157 rtx addr
, bool strict
)
12159 struct ix86_address parts
;
12160 rtx base
, index
, disp
;
12161 HOST_WIDE_INT scale
;
12163 /* Since constant address in x32 is signed extended to 64bit,
12164 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12166 && CONST_INT_P (addr
)
12167 && INTVAL (addr
) < 0)
12170 if (ix86_decompose_address (addr
, &parts
) <= 0)
12171 /* Decomposition failed. */
12175 index
= parts
.index
;
12177 scale
= parts
.scale
;
12179 /* Validate base register. */
12186 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12187 reg
= SUBREG_REG (base
);
12189 /* Base is not a register. */
12192 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12195 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12196 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12197 /* Base is not valid. */
12201 /* Validate index register. */
12208 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12209 reg
= SUBREG_REG (index
);
12211 /* Index is not a register. */
12214 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12217 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12218 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12219 /* Index is not valid. */
12223 /* Index and base should have the same mode. */
12225 && GET_MODE (base
) != GET_MODE (index
))
12228 /* Validate scale factor. */
12232 /* Scale without index. */
12235 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12236 /* Scale is not a valid multiplier. */
12240 /* Validate displacement. */
12243 if (GET_CODE (disp
) == CONST
12244 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12245 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12246 switch (XINT (XEXP (disp
, 0), 1))
12248 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12249 used. While ABI specify also 32bit relocations, we don't produce
12250 them at all and use IP relative instead. */
12252 case UNSPEC_GOTOFF
:
12253 gcc_assert (flag_pic
);
12255 goto is_legitimate_pic
;
12257 /* 64bit address unspec. */
12260 case UNSPEC_GOTPCREL
:
12262 gcc_assert (flag_pic
);
12263 goto is_legitimate_pic
;
12265 case UNSPEC_GOTTPOFF
:
12266 case UNSPEC_GOTNTPOFF
:
12267 case UNSPEC_INDNTPOFF
:
12268 case UNSPEC_NTPOFF
:
12269 case UNSPEC_DTPOFF
:
12272 case UNSPEC_STACK_CHECK
:
12273 gcc_assert (flag_split_stack
);
12277 /* Invalid address unspec. */
12281 else if (SYMBOLIC_CONST (disp
)
12285 && MACHOPIC_INDIRECT
12286 && !machopic_operand_p (disp
)
12292 if (TARGET_64BIT
&& (index
|| base
))
12294 /* foo@dtpoff(%rX) is ok. */
12295 if (GET_CODE (disp
) != CONST
12296 || GET_CODE (XEXP (disp
, 0)) != PLUS
12297 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12298 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12299 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12300 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12301 /* Non-constant pic memory reference. */
12304 else if ((!TARGET_MACHO
|| flag_pic
)
12305 && ! legitimate_pic_address_disp_p (disp
))
12306 /* Displacement is an invalid pic construct. */
12309 else if (MACHO_DYNAMIC_NO_PIC_P
12310 && !ix86_legitimate_constant_p (Pmode
, disp
))
12311 /* displacment must be referenced via non_lazy_pointer */
12315 /* This code used to verify that a symbolic pic displacement
12316 includes the pic_offset_table_rtx register.
12318 While this is good idea, unfortunately these constructs may
12319 be created by "adds using lea" optimization for incorrect
12328 This code is nonsensical, but results in addressing
12329 GOT table with pic_offset_table_rtx base. We can't
12330 just refuse it easily, since it gets matched by
12331 "addsi3" pattern, that later gets split to lea in the
12332 case output register differs from input. While this
12333 can be handled by separate addsi pattern for this case
12334 that never results in lea, this seems to be easier and
12335 correct fix for crash to disable this test. */
12337 else if (GET_CODE (disp
) != LABEL_REF
12338 && !CONST_INT_P (disp
)
12339 && (GET_CODE (disp
) != CONST
12340 || !ix86_legitimate_constant_p (Pmode
, disp
))
12341 && (GET_CODE (disp
) != SYMBOL_REF
12342 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12343 /* Displacement is not constant. */
12345 else if (TARGET_64BIT
12346 && !x86_64_immediate_operand (disp
, VOIDmode
))
12347 /* Displacement is out of range. */
12351 /* Everything looks valid. */
12355 /* Determine if a given RTX is a valid constant address. */
12358 constant_address_p (rtx x
)
12360 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12363 /* Return a unique alias set for the GOT. */
12365 static alias_set_type
12366 ix86_GOT_alias_set (void)
12368 static alias_set_type set
= -1;
12370 set
= new_alias_set ();
12374 /* Return a legitimate reference for ORIG (an address) using the
12375 register REG. If REG is 0, a new pseudo is generated.
12377 There are two types of references that must be handled:
12379 1. Global data references must load the address from the GOT, via
12380 the PIC reg. An insn is emitted to do this load, and the reg is
12383 2. Static data references, constant pool addresses, and code labels
12384 compute the address as an offset from the GOT, whose base is in
12385 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12386 differentiate them from global data objects. The returned
12387 address is the PIC reg + an unspec constant.
12389 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12390 reg also appears in the address. */
12393 legitimize_pic_address (rtx orig
, rtx reg
)
12396 rtx new_rtx
= orig
;
12400 if (TARGET_MACHO
&& !TARGET_64BIT
)
12403 reg
= gen_reg_rtx (Pmode
);
12404 /* Use the generic Mach-O PIC machinery. */
12405 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12409 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12411 else if (TARGET_64BIT
12412 && ix86_cmodel
!= CM_SMALL_PIC
12413 && gotoff_operand (addr
, Pmode
))
12416 /* This symbol may be referenced via a displacement from the PIC
12417 base address (@GOTOFF). */
12419 if (reload_in_progress
)
12420 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12421 if (GET_CODE (addr
) == CONST
)
12422 addr
= XEXP (addr
, 0);
12423 if (GET_CODE (addr
) == PLUS
)
12425 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12427 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12430 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12431 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12433 tmpreg
= gen_reg_rtx (Pmode
);
12436 emit_move_insn (tmpreg
, new_rtx
);
12440 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12441 tmpreg
, 1, OPTAB_DIRECT
);
12444 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12446 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12448 /* This symbol may be referenced via a displacement from the PIC
12449 base address (@GOTOFF). */
12451 if (reload_in_progress
)
12452 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12453 if (GET_CODE (addr
) == CONST
)
12454 addr
= XEXP (addr
, 0);
12455 if (GET_CODE (addr
) == PLUS
)
12457 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12459 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12462 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12463 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12464 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12468 emit_move_insn (reg
, new_rtx
);
12472 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12473 /* We can't use @GOTOFF for text labels on VxWorks;
12474 see gotoff_operand. */
12475 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12477 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12479 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12480 return legitimize_dllimport_symbol (addr
, true);
12481 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12482 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12483 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12485 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12486 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12490 /* For x64 PE-COFF there is no GOT table. So we use address
12492 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12494 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12495 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12498 reg
= gen_reg_rtx (Pmode
);
12499 emit_move_insn (reg
, new_rtx
);
12502 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12504 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12505 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12506 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12507 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12510 reg
= gen_reg_rtx (Pmode
);
12511 /* Use directly gen_movsi, otherwise the address is loaded
12512 into register for CSE. We don't want to CSE this addresses,
12513 instead we CSE addresses from the GOT table, so skip this. */
12514 emit_insn (gen_movsi (reg
, new_rtx
));
12519 /* This symbol must be referenced via a load from the
12520 Global Offset Table (@GOT). */
12522 if (reload_in_progress
)
12523 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12524 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12525 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12527 new_rtx
= force_reg (Pmode
, new_rtx
);
12528 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12529 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12530 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12533 reg
= gen_reg_rtx (Pmode
);
12534 emit_move_insn (reg
, new_rtx
);
12540 if (CONST_INT_P (addr
)
12541 && !x86_64_immediate_operand (addr
, VOIDmode
))
12545 emit_move_insn (reg
, addr
);
12549 new_rtx
= force_reg (Pmode
, addr
);
12551 else if (GET_CODE (addr
) == CONST
)
12553 addr
= XEXP (addr
, 0);
12555 /* We must match stuff we generate before. Assume the only
12556 unspecs that can get here are ours. Not that we could do
12557 anything with them anyway.... */
12558 if (GET_CODE (addr
) == UNSPEC
12559 || (GET_CODE (addr
) == PLUS
12560 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12562 gcc_assert (GET_CODE (addr
) == PLUS
);
12564 if (GET_CODE (addr
) == PLUS
)
12566 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12568 /* Check first to see if this is a constant offset from a @GOTOFF
12569 symbol reference. */
12570 if (gotoff_operand (op0
, Pmode
)
12571 && CONST_INT_P (op1
))
12575 if (reload_in_progress
)
12576 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12577 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12579 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12580 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12581 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12585 emit_move_insn (reg
, new_rtx
);
12591 if (INTVAL (op1
) < -16*1024*1024
12592 || INTVAL (op1
) >= 16*1024*1024)
12594 if (!x86_64_immediate_operand (op1
, Pmode
))
12595 op1
= force_reg (Pmode
, op1
);
12596 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12602 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12603 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12604 base
== reg
? NULL_RTX
: reg
);
12606 if (CONST_INT_P (new_rtx
))
12607 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
12610 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12612 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12613 new_rtx
= XEXP (new_rtx
, 1);
12615 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12623 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12626 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12628 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12630 if (GET_MODE (tp
) != tp_mode
)
12632 gcc_assert (GET_MODE (tp
) == SImode
);
12633 gcc_assert (tp_mode
== DImode
);
12635 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12639 tp
= copy_to_mode_reg (tp_mode
, tp
);
12644 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12646 static GTY(()) rtx ix86_tls_symbol
;
12649 ix86_tls_get_addr (void)
12651 if (!ix86_tls_symbol
)
12654 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12655 ? "___tls_get_addr" : "__tls_get_addr");
12657 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12660 return ix86_tls_symbol
;
12663 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12665 static GTY(()) rtx ix86_tls_module_base_symbol
;
12668 ix86_tls_module_base (void)
12670 if (!ix86_tls_module_base_symbol
)
12672 ix86_tls_module_base_symbol
12673 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12675 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12676 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12679 return ix86_tls_module_base_symbol
;
12682 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12683 false if we expect this to be used for a memory address and true if
12684 we expect to load the address into a register. */
12687 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12689 rtx dest
, base
, off
;
12690 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12691 enum machine_mode tp_mode
= Pmode
;
12696 case TLS_MODEL_GLOBAL_DYNAMIC
:
12697 dest
= gen_reg_rtx (Pmode
);
12702 pic
= pic_offset_table_rtx
;
12705 pic
= gen_reg_rtx (Pmode
);
12706 emit_insn (gen_set_got (pic
));
12710 if (TARGET_GNU2_TLS
)
12713 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12715 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12717 tp
= get_thread_pointer (Pmode
, true);
12718 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12720 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12724 rtx caddr
= ix86_tls_get_addr ();
12728 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12731 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
12733 insns
= get_insns ();
12736 RTL_CONST_CALL_P (insns
) = 1;
12737 emit_libcall_block (insns
, dest
, rax
, x
);
12740 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12744 case TLS_MODEL_LOCAL_DYNAMIC
:
12745 base
= gen_reg_rtx (Pmode
);
12750 pic
= pic_offset_table_rtx
;
12753 pic
= gen_reg_rtx (Pmode
);
12754 emit_insn (gen_set_got (pic
));
12758 if (TARGET_GNU2_TLS
)
12760 rtx tmp
= ix86_tls_module_base ();
12763 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12765 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12767 tp
= get_thread_pointer (Pmode
, true);
12768 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12769 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12773 rtx caddr
= ix86_tls_get_addr ();
12777 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12780 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
12782 insns
= get_insns ();
12785 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12786 share the LD_BASE result with other LD model accesses. */
12787 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12788 UNSPEC_TLS_LD_BASE
);
12790 RTL_CONST_CALL_P (insns
) = 1;
12791 emit_libcall_block (insns
, base
, rax
, eqv
);
12794 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12797 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12798 off
= gen_rtx_CONST (Pmode
, off
);
12800 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12802 if (TARGET_GNU2_TLS
)
12804 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12806 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12810 case TLS_MODEL_INITIAL_EXEC
:
12813 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12815 /* The Sun linker took the AMD64 TLS spec literally
12816 and can only handle %rax as destination of the
12817 initial executable code sequence. */
12819 dest
= gen_reg_rtx (DImode
);
12820 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12824 /* Generate DImode references to avoid %fs:(%reg32)
12825 problems and linker IE->LE relaxation bug. */
12828 type
= UNSPEC_GOTNTPOFF
;
12832 if (reload_in_progress
)
12833 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12834 pic
= pic_offset_table_rtx
;
12835 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12837 else if (!TARGET_ANY_GNU_TLS
)
12839 pic
= gen_reg_rtx (Pmode
);
12840 emit_insn (gen_set_got (pic
));
12841 type
= UNSPEC_GOTTPOFF
;
12846 type
= UNSPEC_INDNTPOFF
;
12849 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12850 off
= gen_rtx_CONST (tp_mode
, off
);
12852 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12853 off
= gen_const_mem (tp_mode
, off
);
12854 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12856 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12858 base
= get_thread_pointer (tp_mode
,
12859 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12860 off
= force_reg (tp_mode
, off
);
12861 return gen_rtx_PLUS (tp_mode
, base
, off
);
12865 base
= get_thread_pointer (Pmode
, true);
12866 dest
= gen_reg_rtx (Pmode
);
12867 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12871 case TLS_MODEL_LOCAL_EXEC
:
12872 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12873 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12874 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12875 off
= gen_rtx_CONST (Pmode
, off
);
12877 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12879 base
= get_thread_pointer (Pmode
,
12880 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12881 return gen_rtx_PLUS (Pmode
, base
, off
);
12885 base
= get_thread_pointer (Pmode
, true);
12886 dest
= gen_reg_rtx (Pmode
);
12887 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12892 gcc_unreachable ();
12898 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12901 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12902 htab_t dllimport_map
;
12905 get_dllimport_decl (tree decl
)
12907 struct tree_map
*h
, in
;
12910 const char *prefix
;
12911 size_t namelen
, prefixlen
;
12916 if (!dllimport_map
)
12917 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12919 in
.hash
= htab_hash_pointer (decl
);
12920 in
.base
.from
= decl
;
12921 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12922 h
= (struct tree_map
*) *loc
;
12926 *loc
= h
= ggc_alloc_tree_map ();
12928 h
->base
.from
= decl
;
12929 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12930 VAR_DECL
, NULL
, ptr_type_node
);
12931 DECL_ARTIFICIAL (to
) = 1;
12932 DECL_IGNORED_P (to
) = 1;
12933 DECL_EXTERNAL (to
) = 1;
12934 TREE_READONLY (to
) = 1;
12936 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12937 name
= targetm
.strip_name_encoding (name
);
12938 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12939 ? "*__imp_" : "*__imp__";
12940 namelen
= strlen (name
);
12941 prefixlen
= strlen (prefix
);
12942 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12943 memcpy (imp_name
, prefix
, prefixlen
);
12944 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12946 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12947 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12948 SET_SYMBOL_REF_DECL (rtl
, to
);
12949 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12951 rtl
= gen_const_mem (Pmode
, rtl
);
12952 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12954 SET_DECL_RTL (to
, rtl
);
12955 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12960 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12961 true if we require the result be a register. */
12964 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12969 gcc_assert (SYMBOL_REF_DECL (symbol
));
12970 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12972 x
= DECL_RTL (imp_decl
);
12974 x
= force_reg (Pmode
, x
);
12978 /* Try machine-dependent ways of modifying an illegitimate address
12979 to be legitimate. If we find one, return the new, valid address.
12980 This macro is used in only one place: `memory_address' in explow.c.
12982 OLDX is the address as it was before break_out_memory_refs was called.
12983 In some cases it is useful to look at this to decide what needs to be done.
12985 It is always safe for this macro to do nothing. It exists to recognize
12986 opportunities to optimize the output.
12988 For the 80386, we handle X+REG by loading X into a register R and
12989 using R+REG. R will go in a general reg and indexing will be used.
12990 However, if REG is a broken-out memory address or multiplication,
12991 nothing needs to be done because REG can certainly go in a general reg.
12993 When -fpic is used, special handling is needed for symbolic references.
12994 See comments by legitimize_pic_address in i386.c for details. */
12997 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12998 enum machine_mode mode
)
13003 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13005 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13006 if (GET_CODE (x
) == CONST
13007 && GET_CODE (XEXP (x
, 0)) == PLUS
13008 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13009 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13011 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13012 (enum tls_model
) log
, false);
13013 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13016 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13018 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13019 return legitimize_dllimport_symbol (x
, true);
13020 if (GET_CODE (x
) == CONST
13021 && GET_CODE (XEXP (x
, 0)) == PLUS
13022 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13023 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13025 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13026 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13030 if (flag_pic
&& SYMBOLIC_CONST (x
))
13031 return legitimize_pic_address (x
, 0);
13034 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13035 return machopic_indirect_data_reference (x
, 0);
13038 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13039 if (GET_CODE (x
) == ASHIFT
13040 && CONST_INT_P (XEXP (x
, 1))
13041 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13044 log
= INTVAL (XEXP (x
, 1));
13045 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13046 GEN_INT (1 << log
));
13049 if (GET_CODE (x
) == PLUS
)
13051 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13053 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13054 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13055 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13058 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13059 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13060 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13061 GEN_INT (1 << log
));
13064 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13065 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13066 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13069 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13070 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13071 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13072 GEN_INT (1 << log
));
13075 /* Put multiply first if it isn't already. */
13076 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13078 rtx tmp
= XEXP (x
, 0);
13079 XEXP (x
, 0) = XEXP (x
, 1);
13084 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13085 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13086 created by virtual register instantiation, register elimination, and
13087 similar optimizations. */
13088 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13091 x
= gen_rtx_PLUS (Pmode
,
13092 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13093 XEXP (XEXP (x
, 1), 0)),
13094 XEXP (XEXP (x
, 1), 1));
13098 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13099 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13100 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13101 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13102 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13103 && CONSTANT_P (XEXP (x
, 1)))
13106 rtx other
= NULL_RTX
;
13108 if (CONST_INT_P (XEXP (x
, 1)))
13110 constant
= XEXP (x
, 1);
13111 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13113 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13115 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13116 other
= XEXP (x
, 1);
13124 x
= gen_rtx_PLUS (Pmode
,
13125 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13126 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13127 plus_constant (Pmode
, other
,
13128 INTVAL (constant
)));
13132 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13135 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13138 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13141 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13144 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13148 && REG_P (XEXP (x
, 1))
13149 && REG_P (XEXP (x
, 0)))
13152 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13155 x
= legitimize_pic_address (x
, 0);
13158 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13161 if (REG_P (XEXP (x
, 0)))
13163 rtx temp
= gen_reg_rtx (Pmode
);
13164 rtx val
= force_operand (XEXP (x
, 1), temp
);
13167 if (GET_MODE (val
) != Pmode
)
13168 val
= convert_to_mode (Pmode
, val
, 1);
13169 emit_move_insn (temp
, val
);
13172 XEXP (x
, 1) = temp
;
13176 else if (REG_P (XEXP (x
, 1)))
13178 rtx temp
= gen_reg_rtx (Pmode
);
13179 rtx val
= force_operand (XEXP (x
, 0), temp
);
13182 if (GET_MODE (val
) != Pmode
)
13183 val
= convert_to_mode (Pmode
, val
, 1);
13184 emit_move_insn (temp
, val
);
13187 XEXP (x
, 0) = temp
;
13195 /* Print an integer constant expression in assembler syntax. Addition
13196 and subtraction are the only arithmetic that may appear in these
13197 expressions. FILE is the stdio stream to write to, X is the rtx, and
13198 CODE is the operand print code from the output string. */
13201 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13205 switch (GET_CODE (x
))
13208 gcc_assert (flag_pic
);
13213 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13214 output_addr_const (file
, x
);
13217 const char *name
= XSTR (x
, 0);
13219 /* Mark the decl as referenced so that cgraph will
13220 output the function. */
13221 if (SYMBOL_REF_DECL (x
))
13222 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13225 if (MACHOPIC_INDIRECT
13226 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13227 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13229 assemble_name (file
, name
);
13231 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13232 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13233 fputs ("@PLT", file
);
13240 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13241 assemble_name (asm_out_file
, buf
);
13245 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13249 /* This used to output parentheses around the expression,
13250 but that does not work on the 386 (either ATT or BSD assembler). */
13251 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13255 if (GET_MODE (x
) == VOIDmode
)
13257 /* We can use %d if the number is <32 bits and positive. */
13258 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13259 fprintf (file
, "0x%lx%08lx",
13260 (unsigned long) CONST_DOUBLE_HIGH (x
),
13261 (unsigned long) CONST_DOUBLE_LOW (x
));
13263 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13266 /* We can't handle floating point constants;
13267 TARGET_PRINT_OPERAND must handle them. */
13268 output_operand_lossage ("floating constant misused");
13272 /* Some assemblers need integer constants to appear first. */
13273 if (CONST_INT_P (XEXP (x
, 0)))
13275 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13277 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13281 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13282 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13284 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13290 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13291 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13293 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13295 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13299 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13301 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13306 gcc_assert (XVECLEN (x
, 0) == 1);
13307 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13308 switch (XINT (x
, 1))
13311 fputs ("@GOT", file
);
13313 case UNSPEC_GOTOFF
:
13314 fputs ("@GOTOFF", file
);
13316 case UNSPEC_PLTOFF
:
13317 fputs ("@PLTOFF", file
);
13320 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13321 "(%rip)" : "[rip]", file
);
13323 case UNSPEC_GOTPCREL
:
13324 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13325 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13327 case UNSPEC_GOTTPOFF
:
13328 /* FIXME: This might be @TPOFF in Sun ld too. */
13329 fputs ("@gottpoff", file
);
13332 fputs ("@tpoff", file
);
13334 case UNSPEC_NTPOFF
:
13336 fputs ("@tpoff", file
);
13338 fputs ("@ntpoff", file
);
13340 case UNSPEC_DTPOFF
:
13341 fputs ("@dtpoff", file
);
13343 case UNSPEC_GOTNTPOFF
:
13345 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13346 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13348 fputs ("@gotntpoff", file
);
13350 case UNSPEC_INDNTPOFF
:
13351 fputs ("@indntpoff", file
);
13354 case UNSPEC_MACHOPIC_OFFSET
:
13356 machopic_output_function_base_name (file
);
13360 output_operand_lossage ("invalid UNSPEC as operand");
13366 output_operand_lossage ("invalid expression as operand");
13370 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13371 We need to emit DTP-relative relocations. */
13373 static void ATTRIBUTE_UNUSED
13374 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13376 fputs (ASM_LONG
, file
);
13377 output_addr_const (file
, x
);
13378 fputs ("@dtpoff", file
);
13384 fputs (", 0", file
);
13387 gcc_unreachable ();
13391 /* Return true if X is a representation of the PIC register. This copes
13392 with calls from ix86_find_base_term, where the register might have
13393 been replaced by a cselib value. */
13396 ix86_pic_register_p (rtx x
)
13398 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13399 return (pic_offset_table_rtx
13400 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13402 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13405 /* Helper function for ix86_delegitimize_address.
13406 Attempt to delegitimize TLS local-exec accesses. */
13409 ix86_delegitimize_tls_address (rtx orig_x
)
13411 rtx x
= orig_x
, unspec
;
13412 struct ix86_address addr
;
13414 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13418 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13420 if (ix86_decompose_address (x
, &addr
) == 0
13421 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13422 || addr
.disp
== NULL_RTX
13423 || GET_CODE (addr
.disp
) != CONST
)
13425 unspec
= XEXP (addr
.disp
, 0);
13426 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13427 unspec
= XEXP (unspec
, 0);
13428 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13430 x
= XVECEXP (unspec
, 0, 0);
13431 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13432 if (unspec
!= XEXP (addr
.disp
, 0))
13433 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13436 rtx idx
= addr
.index
;
13437 if (addr
.scale
!= 1)
13438 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13439 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13442 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13443 if (MEM_P (orig_x
))
13444 x
= replace_equiv_address_nv (orig_x
, x
);
13448 /* In the name of slightly smaller debug output, and to cater to
13449 general assembler lossage, recognize PIC+GOTOFF and turn it back
13450 into a direct symbol reference.
13452 On Darwin, this is necessary to avoid a crash, because Darwin
13453 has a different PIC label for each routine but the DWARF debugging
13454 information is not associated with any particular routine, so it's
13455 necessary to remove references to the PIC label from RTL stored by
13456 the DWARF output code. */
13459 ix86_delegitimize_address (rtx x
)
13461 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13462 /* addend is NULL or some rtx if x is something+GOTOFF where
13463 something doesn't include the PIC register. */
13464 rtx addend
= NULL_RTX
;
13465 /* reg_addend is NULL or a multiple of some register. */
13466 rtx reg_addend
= NULL_RTX
;
13467 /* const_addend is NULL or a const_int. */
13468 rtx const_addend
= NULL_RTX
;
13469 /* This is the result, or NULL. */
13470 rtx result
= NULL_RTX
;
13479 if (GET_CODE (x
) == CONST
13480 && GET_CODE (XEXP (x
, 0)) == PLUS
13481 && GET_MODE (XEXP (x
, 0)) == Pmode
13482 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13483 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13484 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13486 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13487 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13488 if (MEM_P (orig_x
))
13489 x
= replace_equiv_address_nv (orig_x
, x
);
13492 if (GET_CODE (x
) != CONST
13493 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13494 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13495 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13496 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13497 return ix86_delegitimize_tls_address (orig_x
);
13498 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13499 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13501 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13509 if (GET_CODE (x
) != PLUS
13510 || GET_CODE (XEXP (x
, 1)) != CONST
)
13511 return ix86_delegitimize_tls_address (orig_x
);
13513 if (ix86_pic_register_p (XEXP (x
, 0)))
13514 /* %ebx + GOT/GOTOFF */
13516 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13518 /* %ebx + %reg * scale + GOT/GOTOFF */
13519 reg_addend
= XEXP (x
, 0);
13520 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13521 reg_addend
= XEXP (reg_addend
, 1);
13522 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13523 reg_addend
= XEXP (reg_addend
, 0);
13526 reg_addend
= NULL_RTX
;
13527 addend
= XEXP (x
, 0);
13531 addend
= XEXP (x
, 0);
13533 x
= XEXP (XEXP (x
, 1), 0);
13534 if (GET_CODE (x
) == PLUS
13535 && CONST_INT_P (XEXP (x
, 1)))
13537 const_addend
= XEXP (x
, 1);
13541 if (GET_CODE (x
) == UNSPEC
13542 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13543 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13544 result
= XVECEXP (x
, 0, 0);
13546 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13547 && !MEM_P (orig_x
))
13548 result
= XVECEXP (x
, 0, 0);
13551 return ix86_delegitimize_tls_address (orig_x
);
13554 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13556 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13559 /* If the rest of original X doesn't involve the PIC register, add
13560 addend and subtract pic_offset_table_rtx. This can happen e.g.
13562 leal (%ebx, %ecx, 4), %ecx
13564 movl foo@GOTOFF(%ecx), %edx
13565 in which case we return (%ecx - %ebx) + foo. */
13566 if (pic_offset_table_rtx
)
13567 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13568 pic_offset_table_rtx
),
13573 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13575 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13576 if (result
== NULL_RTX
)
13582 /* If X is a machine specific address (i.e. a symbol or label being
13583 referenced as a displacement from the GOT implemented using an
13584 UNSPEC), then return the base term. Otherwise return X. */
13587 ix86_find_base_term (rtx x
)
13593 if (GET_CODE (x
) != CONST
)
13595 term
= XEXP (x
, 0);
13596 if (GET_CODE (term
) == PLUS
13597 && (CONST_INT_P (XEXP (term
, 1))
13598 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13599 term
= XEXP (term
, 0);
13600 if (GET_CODE (term
) != UNSPEC
13601 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13602 && XINT (term
, 1) != UNSPEC_PCREL
))
13605 return XVECEXP (term
, 0, 0);
13608 return ix86_delegitimize_address (x
);
13612 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13613 bool fp
, FILE *file
)
13615 const char *suffix
;
13617 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13619 code
= ix86_fp_compare_code_to_integer (code
);
13623 code
= reverse_condition (code
);
13674 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13678 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13679 Those same assemblers have the same but opposite lossage on cmov. */
13680 if (mode
== CCmode
)
13681 suffix
= fp
? "nbe" : "a";
13682 else if (mode
== CCCmode
)
13685 gcc_unreachable ();
13701 gcc_unreachable ();
13705 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13722 gcc_unreachable ();
13726 /* ??? As above. */
13727 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13728 suffix
= fp
? "nb" : "ae";
13731 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13735 /* ??? As above. */
13736 if (mode
== CCmode
)
13738 else if (mode
== CCCmode
)
13739 suffix
= fp
? "nb" : "ae";
13741 gcc_unreachable ();
13744 suffix
= fp
? "u" : "p";
13747 suffix
= fp
? "nu" : "np";
13750 gcc_unreachable ();
13752 fputs (suffix
, file
);
13755 /* Print the name of register X to FILE based on its machine mode and number.
13756 If CODE is 'w', pretend the mode is HImode.
13757 If CODE is 'b', pretend the mode is QImode.
13758 If CODE is 'k', pretend the mode is SImode.
13759 If CODE is 'q', pretend the mode is DImode.
13760 If CODE is 'x', pretend the mode is V4SFmode.
13761 If CODE is 't', pretend the mode is V8SFmode.
13762 If CODE is 'h', pretend the reg is the 'high' byte register.
13763 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13764 If CODE is 'd', duplicate the operand for AVX instruction.
13768 print_reg (rtx x
, int code
, FILE *file
)
13771 bool duplicated
= code
== 'd' && TARGET_AVX
;
13773 gcc_assert (x
== pc_rtx
13774 || (REGNO (x
) != ARG_POINTER_REGNUM
13775 && REGNO (x
) != FRAME_POINTER_REGNUM
13776 && REGNO (x
) != FLAGS_REG
13777 && REGNO (x
) != FPSR_REG
13778 && REGNO (x
) != FPCR_REG
));
13780 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13785 gcc_assert (TARGET_64BIT
);
13786 fputs ("rip", file
);
13790 if (code
== 'w' || MMX_REG_P (x
))
13792 else if (code
== 'b')
13794 else if (code
== 'k')
13796 else if (code
== 'q')
13798 else if (code
== 'y')
13800 else if (code
== 'h')
13802 else if (code
== 'x')
13804 else if (code
== 't')
13807 code
= GET_MODE_SIZE (GET_MODE (x
));
13809 /* Irritatingly, AMD extended registers use different naming convention
13810 from the normal registers: "r%d[bwd]" */
13811 if (REX_INT_REG_P (x
))
13813 gcc_assert (TARGET_64BIT
);
13815 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13819 error ("extended registers have no high halves");
13834 error ("unsupported operand size for extended register");
13844 if (STACK_TOP_P (x
))
13853 if (! ANY_FP_REG_P (x
))
13854 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13859 reg
= hi_reg_name
[REGNO (x
)];
13862 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13864 reg
= qi_reg_name
[REGNO (x
)];
13867 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13869 reg
= qi_high_reg_name
[REGNO (x
)];
13874 gcc_assert (!duplicated
);
13876 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13881 gcc_unreachable ();
13887 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13888 fprintf (file
, ", %%%s", reg
);
13890 fprintf (file
, ", %s", reg
);
13894 /* Locate some local-dynamic symbol still in use by this function
13895 so that we can print its name in some tls_local_dynamic_base
13899 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13903 if (GET_CODE (x
) == SYMBOL_REF
13904 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13906 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13913 static const char *
13914 get_some_local_dynamic_name (void)
13918 if (cfun
->machine
->some_ld_name
)
13919 return cfun
->machine
->some_ld_name
;
13921 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13922 if (NONDEBUG_INSN_P (insn
)
13923 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13924 return cfun
->machine
->some_ld_name
;
13929 /* Meaning of CODE:
13930 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13931 C -- print opcode suffix for set/cmov insn.
13932 c -- like C, but print reversed condition
13933 F,f -- likewise, but for floating-point.
13934 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13936 R -- print the prefix for register names.
13937 z -- print the opcode suffix for the size of the current operand.
13938 Z -- likewise, with special suffixes for x87 instructions.
13939 * -- print a star (in certain assembler syntax)
13940 A -- print an absolute memory reference.
13941 E -- print address with DImode register names if TARGET_64BIT.
13942 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13943 s -- print a shift double count, followed by the assemblers argument
13945 b -- print the QImode name of the register for the indicated operand.
13946 %b0 would print %al if operands[0] is reg 0.
13947 w -- likewise, print the HImode name of the register.
13948 k -- likewise, print the SImode name of the register.
13949 q -- likewise, print the DImode name of the register.
13950 x -- likewise, print the V4SFmode name of the register.
13951 t -- likewise, print the V8SFmode name of the register.
13952 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13953 y -- print "st(0)" instead of "st" as a register.
13954 d -- print duplicated register operand for AVX instruction.
13955 D -- print condition for SSE cmp instruction.
13956 P -- if PIC, print an @PLT suffix.
13957 p -- print raw symbol name.
13958 X -- don't print any sort of PIC '@' suffix for a symbol.
13959 & -- print some in-use local-dynamic symbol name.
13960 H -- print a memory address offset by 8; used for sse high-parts
13961 Y -- print condition for XOP pcom* instruction.
13962 + -- print a branch hint as 'cs' or 'ds' prefix
13963 ; -- print a semicolon (after prefixes due to bug in older gas).
13964 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13965 @ -- print a segment register of thread base pointer load
13966 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13970 ix86_print_operand (FILE *file
, rtx x
, int code
)
13977 switch (ASSEMBLER_DIALECT
)
13984 /* Intel syntax. For absolute addresses, registers should not
13985 be surrounded by braces. */
13989 ix86_print_operand (file
, x
, 0);
13996 gcc_unreachable ();
13999 ix86_print_operand (file
, x
, 0);
14003 /* Wrap address in an UNSPEC to declare special handling. */
14005 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14007 output_address (x
);
14011 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14016 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14021 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14026 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14031 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14036 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14041 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14042 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14045 switch (GET_MODE_SIZE (GET_MODE (x
)))
14060 output_operand_lossage
14061 ("invalid operand size for operand code 'O'");
14070 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14072 /* Opcodes don't get size suffixes if using Intel opcodes. */
14073 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14076 switch (GET_MODE_SIZE (GET_MODE (x
)))
14095 output_operand_lossage
14096 ("invalid operand size for operand code 'z'");
14101 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14103 (0, "non-integer operand used with operand code 'z'");
14107 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14108 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14111 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14113 switch (GET_MODE_SIZE (GET_MODE (x
)))
14116 #ifdef HAVE_AS_IX86_FILDS
14126 #ifdef HAVE_AS_IX86_FILDQ
14129 fputs ("ll", file
);
14137 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14139 /* 387 opcodes don't get size suffixes
14140 if the operands are registers. */
14141 if (STACK_REG_P (x
))
14144 switch (GET_MODE_SIZE (GET_MODE (x
)))
14165 output_operand_lossage
14166 ("invalid operand type used with operand code 'Z'");
14170 output_operand_lossage
14171 ("invalid operand size for operand code 'Z'");
14189 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14191 ix86_print_operand (file
, x
, 0);
14192 fputs (", ", file
);
14197 switch (GET_CODE (x
))
14200 fputs ("neq", file
);
14203 fputs ("eq", file
);
14207 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14211 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14215 fputs ("le", file
);
14219 fputs ("lt", file
);
14222 fputs ("unord", file
);
14225 fputs ("ord", file
);
14228 fputs ("ueq", file
);
14231 fputs ("nlt", file
);
14234 fputs ("nle", file
);
14237 fputs ("ule", file
);
14240 fputs ("ult", file
);
14243 fputs ("une", file
);
14246 output_operand_lossage ("operand is not a condition code, "
14247 "invalid operand code 'Y'");
14253 /* Little bit of braindamage here. The SSE compare instructions
14254 does use completely different names for the comparisons that the
14255 fp conditional moves. */
14256 switch (GET_CODE (x
))
14261 fputs ("eq_us", file
);
14265 fputs ("eq", file
);
14270 fputs ("nge", file
);
14274 fputs ("lt", file
);
14279 fputs ("ngt", file
);
14283 fputs ("le", file
);
14286 fputs ("unord", file
);
14291 fputs ("neq_oq", file
);
14295 fputs ("neq", file
);
14300 fputs ("ge", file
);
14304 fputs ("nlt", file
);
14309 fputs ("gt", file
);
14313 fputs ("nle", file
);
14316 fputs ("ord", file
);
14319 output_operand_lossage ("operand is not a condition code, "
14320 "invalid operand code 'D'");
14327 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14328 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14334 if (!COMPARISON_P (x
))
14336 output_operand_lossage ("operand is not a condition code, "
14337 "invalid operand code '%c'", code
);
14340 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14341 code
== 'c' || code
== 'f',
14342 code
== 'F' || code
== 'f',
14347 if (!offsettable_memref_p (x
))
14349 output_operand_lossage ("operand is not an offsettable memory "
14350 "reference, invalid operand code 'H'");
14353 /* It doesn't actually matter what mode we use here, as we're
14354 only going to use this for printing. */
14355 x
= adjust_address_nv (x
, DImode
, 8);
14359 gcc_assert (CONST_INT_P (x
));
14361 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14362 #ifdef HAVE_AS_IX86_HLE
14363 fputs ("xacquire ", file
);
14365 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14367 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14368 #ifdef HAVE_AS_IX86_HLE
14369 fputs ("xrelease ", file
);
14371 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14373 /* We do not want to print value of the operand. */
14377 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14383 const char *name
= get_some_local_dynamic_name ();
14385 output_operand_lossage ("'%%&' used without any "
14386 "local dynamic TLS references");
14388 assemble_name (file
, name
);
14397 || optimize_function_for_size_p (cfun
)
14398 || !TARGET_BRANCH_PREDICTION_HINTS
)
14401 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14404 int pred_val
= INTVAL (XEXP (x
, 0));
14406 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14407 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14409 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14411 = final_forward_branch_p (current_output_insn
) == 0;
14413 /* Emit hints only in the case default branch prediction
14414 heuristics would fail. */
14415 if (taken
!= cputaken
)
14417 /* We use 3e (DS) prefix for taken branches and
14418 2e (CS) prefix for not taken branches. */
14420 fputs ("ds ; ", file
);
14422 fputs ("cs ; ", file
);
14430 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14436 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14439 /* The kernel uses a different segment register for performance
14440 reasons; a system call would not have to trash the userspace
14441 segment register, which would be expensive. */
14442 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14443 fputs ("fs", file
);
14445 fputs ("gs", file
);
14449 putc (TARGET_AVX2
? 'i' : 'f', file
);
14453 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14454 fputs ("addr32 ", file
);
14458 output_operand_lossage ("invalid operand code '%c'", code
);
14463 print_reg (x
, code
, file
);
14465 else if (MEM_P (x
))
14467 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14468 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14469 && GET_MODE (x
) != BLKmode
)
14472 switch (GET_MODE_SIZE (GET_MODE (x
)))
14474 case 1: size
= "BYTE"; break;
14475 case 2: size
= "WORD"; break;
14476 case 4: size
= "DWORD"; break;
14477 case 8: size
= "QWORD"; break;
14478 case 12: size
= "TBYTE"; break;
14480 if (GET_MODE (x
) == XFmode
)
14485 case 32: size
= "YMMWORD"; break;
14487 gcc_unreachable ();
14490 /* Check for explicit size override (codes 'b', 'w', 'k',
14494 else if (code
== 'w')
14496 else if (code
== 'k')
14498 else if (code
== 'q')
14500 else if (code
== 'x')
14503 fputs (size
, file
);
14504 fputs (" PTR ", file
);
14508 /* Avoid (%rip) for call operands. */
14509 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14510 && !CONST_INT_P (x
))
14511 output_addr_const (file
, x
);
14512 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14513 output_operand_lossage ("invalid constraints for operand");
14515 output_address (x
);
14518 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14523 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14524 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14526 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14528 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14530 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14532 fprintf (file
, "0x%08x", (unsigned int) l
);
14535 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14540 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14541 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14543 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14545 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14548 /* These float cases don't actually occur as immediate operands. */
14549 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14553 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14554 fputs (dstr
, file
);
14559 /* We have patterns that allow zero sets of memory, for instance.
14560 In 64-bit mode, we should probably support all 8-byte vectors,
14561 since we can in fact encode that into an immediate. */
14562 if (GET_CODE (x
) == CONST_VECTOR
)
14564 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14568 if (code
!= 'P' && code
!= 'p')
14570 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14572 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14575 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14576 || GET_CODE (x
) == LABEL_REF
)
14578 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14581 fputs ("OFFSET FLAT:", file
);
14584 if (CONST_INT_P (x
))
14585 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14586 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14587 output_pic_addr_const (file
, x
, code
);
14589 output_addr_const (file
, x
);
14594 ix86_print_operand_punct_valid_p (unsigned char code
)
14596 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14597 || code
== ';' || code
== '~' || code
== '^');
14600 /* Print a memory operand whose address is ADDR. */
14603 ix86_print_operand_address (FILE *file
, rtx addr
)
14605 struct ix86_address parts
;
14606 rtx base
, index
, disp
;
14612 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14614 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14615 gcc_assert (parts
.index
== NULL_RTX
);
14616 parts
.index
= XVECEXP (addr
, 0, 1);
14617 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14618 addr
= XVECEXP (addr
, 0, 0);
14621 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14623 gcc_assert (TARGET_64BIT
);
14624 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14628 ok
= ix86_decompose_address (addr
, &parts
);
14632 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14634 rtx tmp
= SUBREG_REG (parts
.base
);
14635 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14636 tmp
, GET_MODE (tmp
), 0);
14639 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14641 rtx tmp
= SUBREG_REG (parts
.index
);
14642 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14643 tmp
, GET_MODE (tmp
), 0);
14647 index
= parts
.index
;
14649 scale
= parts
.scale
;
14657 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14659 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14662 gcc_unreachable ();
14665 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14666 if (TARGET_64BIT
&& !base
&& !index
)
14670 if (GET_CODE (disp
) == CONST
14671 && GET_CODE (XEXP (disp
, 0)) == PLUS
14672 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14673 symbol
= XEXP (XEXP (disp
, 0), 0);
14675 if (GET_CODE (symbol
) == LABEL_REF
14676 || (GET_CODE (symbol
) == SYMBOL_REF
14677 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14680 if (!base
&& !index
)
14682 /* Displacement only requires special attention. */
14684 if (CONST_INT_P (disp
))
14686 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14687 fputs ("ds:", file
);
14688 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14691 output_pic_addr_const (file
, disp
, 0);
14693 output_addr_const (file
, disp
);
14697 /* Print SImode register names for zero-extended
14698 addresses to force addr32 prefix. */
14700 && (GET_CODE (addr
) == ZERO_EXTEND
14701 || GET_CODE (addr
) == AND
))
14703 gcc_assert (!code
);
14707 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14712 output_pic_addr_const (file
, disp
, 0);
14713 else if (GET_CODE (disp
) == LABEL_REF
)
14714 output_asm_label (disp
);
14716 output_addr_const (file
, disp
);
14721 print_reg (base
, code
, file
);
14725 print_reg (index
, vsib
? 0 : code
, file
);
14726 if (scale
!= 1 || vsib
)
14727 fprintf (file
, ",%d", scale
);
14733 rtx offset
= NULL_RTX
;
14737 /* Pull out the offset of a symbol; print any symbol itself. */
14738 if (GET_CODE (disp
) == CONST
14739 && GET_CODE (XEXP (disp
, 0)) == PLUS
14740 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14742 offset
= XEXP (XEXP (disp
, 0), 1);
14743 disp
= gen_rtx_CONST (VOIDmode
,
14744 XEXP (XEXP (disp
, 0), 0));
14748 output_pic_addr_const (file
, disp
, 0);
14749 else if (GET_CODE (disp
) == LABEL_REF
)
14750 output_asm_label (disp
);
14751 else if (CONST_INT_P (disp
))
14754 output_addr_const (file
, disp
);
14760 print_reg (base
, code
, file
);
14763 if (INTVAL (offset
) >= 0)
14765 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14769 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14776 print_reg (index
, vsib
? 0 : code
, file
);
14777 if (scale
!= 1 || vsib
)
14778 fprintf (file
, "*%d", scale
);
14785 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14788 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14792 if (GET_CODE (x
) != UNSPEC
)
14795 op
= XVECEXP (x
, 0, 0);
14796 switch (XINT (x
, 1))
14798 case UNSPEC_GOTTPOFF
:
14799 output_addr_const (file
, op
);
14800 /* FIXME: This might be @TPOFF in Sun ld. */
14801 fputs ("@gottpoff", file
);
14804 output_addr_const (file
, op
);
14805 fputs ("@tpoff", file
);
14807 case UNSPEC_NTPOFF
:
14808 output_addr_const (file
, op
);
14810 fputs ("@tpoff", file
);
14812 fputs ("@ntpoff", file
);
14814 case UNSPEC_DTPOFF
:
14815 output_addr_const (file
, op
);
14816 fputs ("@dtpoff", file
);
14818 case UNSPEC_GOTNTPOFF
:
14819 output_addr_const (file
, op
);
14821 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14822 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14824 fputs ("@gotntpoff", file
);
14826 case UNSPEC_INDNTPOFF
:
14827 output_addr_const (file
, op
);
14828 fputs ("@indntpoff", file
);
14831 case UNSPEC_MACHOPIC_OFFSET
:
14832 output_addr_const (file
, op
);
14834 machopic_output_function_base_name (file
);
14838 case UNSPEC_STACK_CHECK
:
14842 gcc_assert (flag_split_stack
);
14844 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14845 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14847 gcc_unreachable ();
14850 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14861 /* Split one or more double-mode RTL references into pairs of half-mode
14862 references. The RTL can be REG, offsettable MEM, integer constant, or
14863 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14864 split and "num" is its length. lo_half and hi_half are output arrays
14865 that parallel "operands". */
14868 split_double_mode (enum machine_mode mode
, rtx operands
[],
14869 int num
, rtx lo_half
[], rtx hi_half
[])
14871 enum machine_mode half_mode
;
14877 half_mode
= DImode
;
14880 half_mode
= SImode
;
14883 gcc_unreachable ();
14886 byte
= GET_MODE_SIZE (half_mode
);
14890 rtx op
= operands
[num
];
14892 /* simplify_subreg refuse to split volatile memory addresses,
14893 but we still have to handle it. */
14896 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14897 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14901 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14902 GET_MODE (op
) == VOIDmode
14903 ? mode
: GET_MODE (op
), 0);
14904 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14905 GET_MODE (op
) == VOIDmode
14906 ? mode
: GET_MODE (op
), byte
);
14911 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14912 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14913 is the expression of the binary operation. The output may either be
14914 emitted here, or returned to the caller, like all output_* functions.
14916 There is no guarantee that the operands are the same mode, as they
14917 might be within FLOAT or FLOAT_EXTEND expressions. */
14919 #ifndef SYSV386_COMPAT
14920 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14921 wants to fix the assemblers because that causes incompatibility
14922 with gcc. No-one wants to fix gcc because that causes
14923 incompatibility with assemblers... You can use the option of
14924 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14925 #define SYSV386_COMPAT 1
14929 output_387_binary_op (rtx insn
, rtx
*operands
)
14931 static char buf
[40];
14934 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14936 #ifdef ENABLE_CHECKING
14937 /* Even if we do not want to check the inputs, this documents input
14938 constraints. Which helps in understanding the following code. */
14939 if (STACK_REG_P (operands
[0])
14940 && ((REG_P (operands
[1])
14941 && REGNO (operands
[0]) == REGNO (operands
[1])
14942 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14943 || (REG_P (operands
[2])
14944 && REGNO (operands
[0]) == REGNO (operands
[2])
14945 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14946 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14949 gcc_assert (is_sse
);
14952 switch (GET_CODE (operands
[3]))
14955 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14956 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14964 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14965 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14973 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14974 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14982 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14983 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14991 gcc_unreachable ();
14998 strcpy (buf
, ssep
);
14999 if (GET_MODE (operands
[0]) == SFmode
)
15000 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15002 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15006 strcpy (buf
, ssep
+ 1);
15007 if (GET_MODE (operands
[0]) == SFmode
)
15008 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15010 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15016 switch (GET_CODE (operands
[3]))
15020 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15022 rtx temp
= operands
[2];
15023 operands
[2] = operands
[1];
15024 operands
[1] = temp
;
15027 /* know operands[0] == operands[1]. */
15029 if (MEM_P (operands
[2]))
15035 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15037 if (STACK_TOP_P (operands
[0]))
15038 /* How is it that we are storing to a dead operand[2]?
15039 Well, presumably operands[1] is dead too. We can't
15040 store the result to st(0) as st(0) gets popped on this
15041 instruction. Instead store to operands[2] (which I
15042 think has to be st(1)). st(1) will be popped later.
15043 gcc <= 2.8.1 didn't have this check and generated
15044 assembly code that the Unixware assembler rejected. */
15045 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15047 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15051 if (STACK_TOP_P (operands
[0]))
15052 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15054 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15059 if (MEM_P (operands
[1]))
15065 if (MEM_P (operands
[2]))
15071 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15074 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15075 derived assemblers, confusingly reverse the direction of
15076 the operation for fsub{r} and fdiv{r} when the
15077 destination register is not st(0). The Intel assembler
15078 doesn't have this brain damage. Read !SYSV386_COMPAT to
15079 figure out what the hardware really does. */
15080 if (STACK_TOP_P (operands
[0]))
15081 p
= "{p\t%0, %2|rp\t%2, %0}";
15083 p
= "{rp\t%2, %0|p\t%0, %2}";
15085 if (STACK_TOP_P (operands
[0]))
15086 /* As above for fmul/fadd, we can't store to st(0). */
15087 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15089 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15094 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15097 if (STACK_TOP_P (operands
[0]))
15098 p
= "{rp\t%0, %1|p\t%1, %0}";
15100 p
= "{p\t%1, %0|rp\t%0, %1}";
15102 if (STACK_TOP_P (operands
[0]))
15103 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15105 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15110 if (STACK_TOP_P (operands
[0]))
15112 if (STACK_TOP_P (operands
[1]))
15113 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15115 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15118 else if (STACK_TOP_P (operands
[1]))
15121 p
= "{\t%1, %0|r\t%0, %1}";
15123 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15129 p
= "{r\t%2, %0|\t%0, %2}";
15131 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15137 gcc_unreachable ();
15144 /* Return needed mode for entity in optimize_mode_switching pass. */
15147 ix86_mode_needed (int entity
, rtx insn
)
15149 enum attr_i387_cw mode
;
15151 /* The mode UNINITIALIZED is used to store control word after a
15152 function call or ASM pattern. The mode ANY specify that function
15153 has no requirements on the control word and make no changes in the
15154 bits we are interested in. */
15157 || (NONJUMP_INSN_P (insn
)
15158 && (asm_noperands (PATTERN (insn
)) >= 0
15159 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15160 return I387_CW_UNINITIALIZED
;
15162 if (recog_memoized (insn
) < 0)
15163 return I387_CW_ANY
;
15165 mode
= get_attr_i387_cw (insn
);
15170 if (mode
== I387_CW_TRUNC
)
15175 if (mode
== I387_CW_FLOOR
)
15180 if (mode
== I387_CW_CEIL
)
15185 if (mode
== I387_CW_MASK_PM
)
15190 gcc_unreachable ();
15193 return I387_CW_ANY
;
15196 /* Output code to initialize control word copies used by trunc?f?i and
15197 rounding patterns. CURRENT_MODE is set to current control word,
15198 while NEW_MODE is set to new control word. */
15201 emit_i387_cw_initialization (int mode
)
15203 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15206 enum ix86_stack_slot slot
;
15208 rtx reg
= gen_reg_rtx (HImode
);
15210 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15211 emit_move_insn (reg
, copy_rtx (stored_mode
));
15213 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15214 || optimize_function_for_size_p (cfun
))
15218 case I387_CW_TRUNC
:
15219 /* round toward zero (truncate) */
15220 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15221 slot
= SLOT_CW_TRUNC
;
15224 case I387_CW_FLOOR
:
15225 /* round down toward -oo */
15226 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15227 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15228 slot
= SLOT_CW_FLOOR
;
15232 /* round up toward +oo */
15233 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15234 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15235 slot
= SLOT_CW_CEIL
;
15238 case I387_CW_MASK_PM
:
15239 /* mask precision exception for nearbyint() */
15240 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15241 slot
= SLOT_CW_MASK_PM
;
15245 gcc_unreachable ();
15252 case I387_CW_TRUNC
:
15253 /* round toward zero (truncate) */
15254 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15255 slot
= SLOT_CW_TRUNC
;
15258 case I387_CW_FLOOR
:
15259 /* round down toward -oo */
15260 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15261 slot
= SLOT_CW_FLOOR
;
15265 /* round up toward +oo */
15266 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15267 slot
= SLOT_CW_CEIL
;
15270 case I387_CW_MASK_PM
:
15271 /* mask precision exception for nearbyint() */
15272 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15273 slot
= SLOT_CW_MASK_PM
;
15277 gcc_unreachable ();
15281 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15283 new_mode
= assign_386_stack_local (HImode
, slot
);
15284 emit_move_insn (new_mode
, reg
);
15287 /* Output code for INSN to convert a float to a signed int. OPERANDS
15288 are the insn operands. The output may be [HSD]Imode and the input
15289 operand may be [SDX]Fmode. */
15292 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15294 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15295 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15296 int round_mode
= get_attr_i387_cw (insn
);
15298 /* Jump through a hoop or two for DImode, since the hardware has no
15299 non-popping instruction. We used to do this a different way, but
15300 that was somewhat fragile and broke with post-reload splitters. */
15301 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15302 output_asm_insn ("fld\t%y1", operands
);
15304 gcc_assert (STACK_TOP_P (operands
[1]));
15305 gcc_assert (MEM_P (operands
[0]));
15306 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15309 output_asm_insn ("fisttp%Z0\t%0", operands
);
15312 if (round_mode
!= I387_CW_ANY
)
15313 output_asm_insn ("fldcw\t%3", operands
);
15314 if (stack_top_dies
|| dimode_p
)
15315 output_asm_insn ("fistp%Z0\t%0", operands
);
15317 output_asm_insn ("fist%Z0\t%0", operands
);
15318 if (round_mode
!= I387_CW_ANY
)
15319 output_asm_insn ("fldcw\t%2", operands
);
15325 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15326 have the values zero or one, indicates the ffreep insn's operand
15327 from the OPERANDS array. */
15329 static const char *
15330 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15332 if (TARGET_USE_FFREEP
)
15333 #ifdef HAVE_AS_IX86_FFREEP
15334 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15337 static char retval
[32];
15338 int regno
= REGNO (operands
[opno
]);
15340 gcc_assert (FP_REGNO_P (regno
));
15342 regno
-= FIRST_STACK_REG
;
15344 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15349 return opno
? "fstp\t%y1" : "fstp\t%y0";
15353 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15354 should be used. UNORDERED_P is true when fucom should be used. */
15357 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15359 int stack_top_dies
;
15360 rtx cmp_op0
, cmp_op1
;
15361 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15365 cmp_op0
= operands
[0];
15366 cmp_op1
= operands
[1];
15370 cmp_op0
= operands
[1];
15371 cmp_op1
= operands
[2];
15376 if (GET_MODE (operands
[0]) == SFmode
)
15378 return "%vucomiss\t{%1, %0|%0, %1}";
15380 return "%vcomiss\t{%1, %0|%0, %1}";
15383 return "%vucomisd\t{%1, %0|%0, %1}";
15385 return "%vcomisd\t{%1, %0|%0, %1}";
15388 gcc_assert (STACK_TOP_P (cmp_op0
));
15390 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15392 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15394 if (stack_top_dies
)
15396 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15397 return output_387_ffreep (operands
, 1);
15400 return "ftst\n\tfnstsw\t%0";
15403 if (STACK_REG_P (cmp_op1
)
15405 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15406 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15408 /* If both the top of the 387 stack dies, and the other operand
15409 is also a stack register that dies, then this must be a
15410 `fcompp' float compare */
15414 /* There is no double popping fcomi variant. Fortunately,
15415 eflags is immune from the fstp's cc clobbering. */
15417 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15419 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15420 return output_387_ffreep (operands
, 0);
15425 return "fucompp\n\tfnstsw\t%0";
15427 return "fcompp\n\tfnstsw\t%0";
15432 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15434 static const char * const alt
[16] =
15436 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15437 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15438 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15439 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15441 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15442 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15446 "fcomi\t{%y1, %0|%0, %y1}",
15447 "fcomip\t{%y1, %0|%0, %y1}",
15448 "fucomi\t{%y1, %0|%0, %y1}",
15449 "fucomip\t{%y1, %0|%0, %y1}",
15460 mask
= eflags_p
<< 3;
15461 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15462 mask
|= unordered_p
<< 1;
15463 mask
|= stack_top_dies
;
15465 gcc_assert (mask
< 16);
15474 ix86_output_addr_vec_elt (FILE *file
, int value
)
15476 const char *directive
= ASM_LONG
;
15480 directive
= ASM_QUAD
;
15482 gcc_assert (!TARGET_64BIT
);
15485 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15489 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15491 const char *directive
= ASM_LONG
;
15494 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15495 directive
= ASM_QUAD
;
15497 gcc_assert (!TARGET_64BIT
);
15499 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15500 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15501 fprintf (file
, "%s%s%d-%s%d\n",
15502 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15503 else if (HAVE_AS_GOTOFF_IN_DATA
)
15504 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15506 else if (TARGET_MACHO
)
15508 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15509 machopic_output_function_base_name (file
);
15514 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15515 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15518 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15522 ix86_expand_clear (rtx dest
)
15526 /* We play register width games, which are only valid after reload. */
15527 gcc_assert (reload_completed
);
15529 /* Avoid HImode and its attendant prefix byte. */
15530 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15531 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15532 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15534 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15535 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15537 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15538 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15544 /* X is an unchanging MEM. If it is a constant pool reference, return
15545 the constant pool rtx, else NULL. */
15548 maybe_get_pool_constant (rtx x
)
15550 x
= ix86_delegitimize_address (XEXP (x
, 0));
15552 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15553 return get_pool_constant (x
);
15559 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15562 enum tls_model model
;
15567 if (GET_CODE (op1
) == SYMBOL_REF
)
15569 model
= SYMBOL_REF_TLS_MODEL (op1
);
15572 op1
= legitimize_tls_address (op1
, model
, true);
15573 op1
= force_operand (op1
, op0
);
15576 if (GET_MODE (op1
) != mode
)
15577 op1
= convert_to_mode (mode
, op1
, 1);
15579 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15580 && SYMBOL_REF_DLLIMPORT_P (op1
))
15581 op1
= legitimize_dllimport_symbol (op1
, false);
15583 else if (GET_CODE (op1
) == CONST
15584 && GET_CODE (XEXP (op1
, 0)) == PLUS
15585 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15587 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15588 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15591 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15593 tmp
= legitimize_tls_address (symbol
, model
, true);
15594 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15595 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15596 tmp
= legitimize_dllimport_symbol (symbol
, true);
15600 tmp
= force_operand (tmp
, NULL
);
15601 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15602 op0
, 1, OPTAB_DIRECT
);
15605 if (GET_MODE (tmp
) != mode
)
15606 op1
= convert_to_mode (mode
, tmp
, 1);
15610 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15611 && symbolic_operand (op1
, mode
))
15613 if (TARGET_MACHO
&& !TARGET_64BIT
)
15616 /* dynamic-no-pic */
15617 if (MACHOPIC_INDIRECT
)
15619 rtx temp
= ((reload_in_progress
15620 || ((op0
&& REG_P (op0
))
15622 ? op0
: gen_reg_rtx (Pmode
));
15623 op1
= machopic_indirect_data_reference (op1
, temp
);
15625 op1
= machopic_legitimize_pic_address (op1
, mode
,
15626 temp
== op1
? 0 : temp
);
15628 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15630 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15634 if (GET_CODE (op0
) == MEM
)
15635 op1
= force_reg (Pmode
, op1
);
15639 if (GET_CODE (temp
) != REG
)
15640 temp
= gen_reg_rtx (Pmode
);
15641 temp
= legitimize_pic_address (op1
, temp
);
15646 /* dynamic-no-pic */
15652 op1
= force_reg (mode
, op1
);
15653 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15655 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15656 op1
= legitimize_pic_address (op1
, reg
);
15659 if (GET_MODE (op1
) != mode
)
15660 op1
= convert_to_mode (mode
, op1
, 1);
15667 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15668 || !push_operand (op0
, mode
))
15670 op1
= force_reg (mode
, op1
);
15672 if (push_operand (op0
, mode
)
15673 && ! general_no_elim_operand (op1
, mode
))
15674 op1
= copy_to_mode_reg (mode
, op1
);
15676 /* Force large constants in 64bit compilation into register
15677 to get them CSEed. */
15678 if (can_create_pseudo_p ()
15679 && (mode
== DImode
) && TARGET_64BIT
15680 && immediate_operand (op1
, mode
)
15681 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15682 && !register_operand (op0
, mode
)
15684 op1
= copy_to_mode_reg (mode
, op1
);
15686 if (can_create_pseudo_p ()
15687 && FLOAT_MODE_P (mode
)
15688 && GET_CODE (op1
) == CONST_DOUBLE
)
15690 /* If we are loading a floating point constant to a register,
15691 force the value to memory now, since we'll get better code
15692 out the back end. */
15694 op1
= validize_mem (force_const_mem (mode
, op1
));
15695 if (!register_operand (op0
, mode
))
15697 rtx temp
= gen_reg_rtx (mode
);
15698 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15699 emit_move_insn (op0
, temp
);
15705 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15709 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15711 rtx op0
= operands
[0], op1
= operands
[1];
15712 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15714 /* Force constants other than zero into memory. We do not know how
15715 the instructions used to build constants modify the upper 64 bits
15716 of the register, once we have that information we may be able
15717 to handle some of them more efficiently. */
15718 if (can_create_pseudo_p ()
15719 && register_operand (op0
, mode
)
15720 && (CONSTANT_P (op1
)
15721 || (GET_CODE (op1
) == SUBREG
15722 && CONSTANT_P (SUBREG_REG (op1
))))
15723 && !standard_sse_constant_p (op1
))
15724 op1
= validize_mem (force_const_mem (mode
, op1
));
15726 /* We need to check memory alignment for SSE mode since attribute
15727 can make operands unaligned. */
15728 if (can_create_pseudo_p ()
15729 && SSE_REG_MODE_P (mode
)
15730 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15731 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15735 /* ix86_expand_vector_move_misalign() does not like constants ... */
15736 if (CONSTANT_P (op1
)
15737 || (GET_CODE (op1
) == SUBREG
15738 && CONSTANT_P (SUBREG_REG (op1
))))
15739 op1
= validize_mem (force_const_mem (mode
, op1
));
15741 /* ... nor both arguments in memory. */
15742 if (!register_operand (op0
, mode
)
15743 && !register_operand (op1
, mode
))
15744 op1
= force_reg (mode
, op1
);
15746 tmp
[0] = op0
; tmp
[1] = op1
;
15747 ix86_expand_vector_move_misalign (mode
, tmp
);
15751 /* Make operand1 a register if it isn't already. */
15752 if (can_create_pseudo_p ()
15753 && !register_operand (op0
, mode
)
15754 && !register_operand (op1
, mode
))
15756 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15760 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15763 /* Split 32-byte AVX unaligned load and store if needed. */
15766 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15769 rtx (*extract
) (rtx
, rtx
, rtx
);
15770 rtx (*move_unaligned
) (rtx
, rtx
);
15771 enum machine_mode mode
;
15773 switch (GET_MODE (op0
))
15776 gcc_unreachable ();
15778 extract
= gen_avx_vextractf128v32qi
;
15779 move_unaligned
= gen_avx_movdqu256
;
15783 extract
= gen_avx_vextractf128v8sf
;
15784 move_unaligned
= gen_avx_movups256
;
15788 extract
= gen_avx_vextractf128v4df
;
15789 move_unaligned
= gen_avx_movupd256
;
15794 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15796 rtx r
= gen_reg_rtx (mode
);
15797 m
= adjust_address (op1
, mode
, 0);
15798 emit_move_insn (r
, m
);
15799 m
= adjust_address (op1
, mode
, 16);
15800 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15801 emit_move_insn (op0
, r
);
15803 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15805 m
= adjust_address (op0
, mode
, 0);
15806 emit_insn (extract (m
, op1
, const0_rtx
));
15807 m
= adjust_address (op0
, mode
, 16);
15808 emit_insn (extract (m
, op1
, const1_rtx
));
15811 emit_insn (move_unaligned (op0
, op1
));
15814 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15815 straight to ix86_expand_vector_move. */
15816 /* Code generation for scalar reg-reg moves of single and double precision data:
15817 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15821 if (x86_sse_partial_reg_dependency == true)
15826 Code generation for scalar loads of double precision data:
15827 if (x86_sse_split_regs == true)
15828 movlpd mem, reg (gas syntax)
15832 Code generation for unaligned packed loads of single precision data
15833 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15834 if (x86_sse_unaligned_move_optimal)
15837 if (x86_sse_partial_reg_dependency == true)
15849 Code generation for unaligned packed loads of double precision data
15850 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15851 if (x86_sse_unaligned_move_optimal)
15854 if (x86_sse_split_regs == true)
15867 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15875 && GET_MODE_SIZE (mode
) == 32)
15877 switch (GET_MODE_CLASS (mode
))
15879 case MODE_VECTOR_INT
:
15881 op0
= gen_lowpart (V32QImode
, op0
);
15882 op1
= gen_lowpart (V32QImode
, op1
);
15885 case MODE_VECTOR_FLOAT
:
15886 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15890 gcc_unreachable ();
15898 /* ??? If we have typed data, then it would appear that using
15899 movdqu is the only way to get unaligned data loaded with
15901 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15903 op0
= gen_lowpart (V16QImode
, op0
);
15904 op1
= gen_lowpart (V16QImode
, op1
);
15905 /* We will eventually emit movups based on insn attributes. */
15906 emit_insn (gen_sse2_movdqu (op0
, op1
));
15908 else if (TARGET_SSE2
&& mode
== V2DFmode
)
15913 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
15914 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
15915 || optimize_function_for_size_p (cfun
))
15917 /* We will eventually emit movups based on insn attributes. */
15918 emit_insn (gen_sse2_movupd (op0
, op1
));
15922 /* When SSE registers are split into halves, we can avoid
15923 writing to the top half twice. */
15924 if (TARGET_SSE_SPLIT_REGS
)
15926 emit_clobber (op0
);
15931 /* ??? Not sure about the best option for the Intel chips.
15932 The following would seem to satisfy; the register is
15933 entirely cleared, breaking the dependency chain. We
15934 then store to the upper half, with a dependency depth
15935 of one. A rumor has it that Intel recommends two movsd
15936 followed by an unpacklpd, but this is unconfirmed. And
15937 given that the dependency depth of the unpacklpd would
15938 still be one, I'm not sure why this would be better. */
15939 zero
= CONST0_RTX (V2DFmode
);
15942 m
= adjust_address (op1
, DFmode
, 0);
15943 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15944 m
= adjust_address (op1
, DFmode
, 8);
15945 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15950 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
15951 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
15952 || optimize_function_for_size_p (cfun
))
15954 op0
= gen_lowpart (V4SFmode
, op0
);
15955 op1
= gen_lowpart (V4SFmode
, op1
);
15956 emit_insn (gen_sse_movups (op0
, op1
));
15960 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15961 emit_move_insn (op0
, CONST0_RTX (mode
));
15963 emit_clobber (op0
);
15965 if (mode
!= V4SFmode
)
15966 op0
= gen_lowpart (V4SFmode
, op0
);
15968 m
= adjust_address (op1
, V2SFmode
, 0);
15969 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
15970 m
= adjust_address (op1
, V2SFmode
, 8);
15971 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
15974 else if (MEM_P (op0
))
15976 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15978 op0
= gen_lowpart (V16QImode
, op0
);
15979 op1
= gen_lowpart (V16QImode
, op1
);
15980 /* We will eventually emit movups based on insn attributes. */
15981 emit_insn (gen_sse2_movdqu (op0
, op1
));
15983 else if (TARGET_SSE2
&& mode
== V2DFmode
)
15986 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
15987 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
15988 || optimize_function_for_size_p (cfun
))
15989 /* We will eventually emit movups based on insn attributes. */
15990 emit_insn (gen_sse2_movupd (op0
, op1
));
15993 m
= adjust_address (op0
, DFmode
, 0);
15994 emit_insn (gen_sse2_storelpd (m
, op1
));
15995 m
= adjust_address (op0
, DFmode
, 8);
15996 emit_insn (gen_sse2_storehpd (m
, op1
));
16001 if (mode
!= V4SFmode
)
16002 op1
= gen_lowpart (V4SFmode
, op1
);
16005 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16006 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16007 || optimize_function_for_size_p (cfun
))
16009 op0
= gen_lowpart (V4SFmode
, op0
);
16010 emit_insn (gen_sse_movups (op0
, op1
));
16014 m
= adjust_address (op0
, V2SFmode
, 0);
16015 emit_insn (gen_sse_storelps (m
, op1
));
16016 m
= adjust_address (op0
, V2SFmode
, 8);
16017 emit_insn (gen_sse_storehps (m
, op1
));
16022 gcc_unreachable ();
16025 /* Expand a push in MODE. This is some mode for which we do not support
16026 proper push instructions, at least from the registers that we expect
16027 the value to live in. */
16030 ix86_expand_push (enum machine_mode mode
, rtx x
)
16034 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16035 GEN_INT (-GET_MODE_SIZE (mode
)),
16036 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16037 if (tmp
!= stack_pointer_rtx
)
16038 emit_move_insn (stack_pointer_rtx
, tmp
);
16040 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16042 /* When we push an operand onto stack, it has to be aligned at least
16043 at the function argument boundary. However since we don't have
16044 the argument type, we can't determine the actual argument
16046 emit_move_insn (tmp
, x
);
16049 /* Helper function of ix86_fixup_binary_operands to canonicalize
16050 operand order. Returns true if the operands should be swapped. */
16053 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16056 rtx dst
= operands
[0];
16057 rtx src1
= operands
[1];
16058 rtx src2
= operands
[2];
16060 /* If the operation is not commutative, we can't do anything. */
16061 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16064 /* Highest priority is that src1 should match dst. */
16065 if (rtx_equal_p (dst
, src1
))
16067 if (rtx_equal_p (dst
, src2
))
16070 /* Next highest priority is that immediate constants come second. */
16071 if (immediate_operand (src2
, mode
))
16073 if (immediate_operand (src1
, mode
))
16076 /* Lowest priority is that memory references should come second. */
16086 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16087 destination to use for the operation. If different from the true
16088 destination in operands[0], a copy operation will be required. */
16091 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16094 rtx dst
= operands
[0];
16095 rtx src1
= operands
[1];
16096 rtx src2
= operands
[2];
16098 /* Canonicalize operand order. */
16099 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16103 /* It is invalid to swap operands of different modes. */
16104 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16111 /* Both source operands cannot be in memory. */
16112 if (MEM_P (src1
) && MEM_P (src2
))
16114 /* Optimization: Only read from memory once. */
16115 if (rtx_equal_p (src1
, src2
))
16117 src2
= force_reg (mode
, src2
);
16121 src2
= force_reg (mode
, src2
);
16124 /* If the destination is memory, and we do not have matching source
16125 operands, do things in registers. */
16126 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16127 dst
= gen_reg_rtx (mode
);
16129 /* Source 1 cannot be a constant. */
16130 if (CONSTANT_P (src1
))
16131 src1
= force_reg (mode
, src1
);
16133 /* Source 1 cannot be a non-matching memory. */
16134 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16135 src1
= force_reg (mode
, src1
);
16137 /* Improve address combine. */
16139 && GET_MODE_CLASS (mode
) == MODE_INT
16141 src2
= force_reg (mode
, src2
);
16143 operands
[1] = src1
;
16144 operands
[2] = src2
;
16148 /* Similarly, but assume that the destination has already been
16149 set up properly. */
16152 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16153 enum machine_mode mode
, rtx operands
[])
16155 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16156 gcc_assert (dst
== operands
[0]);
16159 /* Attempt to expand a binary operator. Make the expansion closer to the
16160 actual machine, then just general_operand, which will allow 3 separate
16161 memory references (one output, two input) in a single insn. */
16164 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16167 rtx src1
, src2
, dst
, op
, clob
;
16169 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16170 src1
= operands
[1];
16171 src2
= operands
[2];
16173 /* Emit the instruction. */
16175 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16176 if (reload_in_progress
)
16178 /* Reload doesn't know about the flags register, and doesn't know that
16179 it doesn't want to clobber it. We can only do this with PLUS. */
16180 gcc_assert (code
== PLUS
);
16183 else if (reload_completed
16185 && !rtx_equal_p (dst
, src1
))
16187 /* This is going to be an LEA; avoid splitting it later. */
16192 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16193 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16196 /* Fix up the destination if needed. */
16197 if (dst
!= operands
[0])
16198 emit_move_insn (operands
[0], dst
);
16201 /* Return TRUE or FALSE depending on whether the binary operator meets the
16202 appropriate constraints. */
16205 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16208 rtx dst
= operands
[0];
16209 rtx src1
= operands
[1];
16210 rtx src2
= operands
[2];
16212 /* Both source operands cannot be in memory. */
16213 if (MEM_P (src1
) && MEM_P (src2
))
16216 /* Canonicalize operand order for commutative operators. */
16217 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16224 /* If the destination is memory, we must have a matching source operand. */
16225 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16228 /* Source 1 cannot be a constant. */
16229 if (CONSTANT_P (src1
))
16232 /* Source 1 cannot be a non-matching memory. */
16233 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16234 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16235 return (code
== AND
16238 || (TARGET_64BIT
&& mode
== DImode
))
16239 && satisfies_constraint_L (src2
));
16244 /* Attempt to expand a unary operator. Make the expansion closer to the
16245 actual machine, then just general_operand, which will allow 2 separate
16246 memory references (one output, one input) in a single insn. */
16249 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16252 int matching_memory
;
16253 rtx src
, dst
, op
, clob
;
16258 /* If the destination is memory, and we do not have matching source
16259 operands, do things in registers. */
16260 matching_memory
= 0;
16263 if (rtx_equal_p (dst
, src
))
16264 matching_memory
= 1;
16266 dst
= gen_reg_rtx (mode
);
16269 /* When source operand is memory, destination must match. */
16270 if (MEM_P (src
) && !matching_memory
)
16271 src
= force_reg (mode
, src
);
16273 /* Emit the instruction. */
16275 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16276 if (reload_in_progress
|| code
== NOT
)
16278 /* Reload doesn't know about the flags register, and doesn't know that
16279 it doesn't want to clobber it. */
16280 gcc_assert (code
== NOT
);
16285 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16286 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16289 /* Fix up the destination if needed. */
16290 if (dst
!= operands
[0])
16291 emit_move_insn (operands
[0], dst
);
16294 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16295 divisor are within the range [0-255]. */
16298 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16301 rtx end_label
, qimode_label
;
16302 rtx insn
, div
, mod
;
16303 rtx scratch
, tmp0
, tmp1
, tmp2
;
16304 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16305 rtx (*gen_zero_extend
) (rtx
, rtx
);
16306 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16311 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16312 gen_test_ccno_1
= gen_testsi_ccno_1
;
16313 gen_zero_extend
= gen_zero_extendqisi2
;
16316 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16317 gen_test_ccno_1
= gen_testdi_ccno_1
;
16318 gen_zero_extend
= gen_zero_extendqidi2
;
16321 gcc_unreachable ();
16324 end_label
= gen_label_rtx ();
16325 qimode_label
= gen_label_rtx ();
16327 scratch
= gen_reg_rtx (mode
);
16329 /* Use 8bit unsigned divimod if dividend and divisor are within
16330 the range [0-255]. */
16331 emit_move_insn (scratch
, operands
[2]);
16332 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16333 scratch
, 1, OPTAB_DIRECT
);
16334 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16335 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16336 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16337 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16338 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16340 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16341 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16342 JUMP_LABEL (insn
) = qimode_label
;
16344 /* Generate original signed/unsigned divimod. */
16345 div
= gen_divmod4_1 (operands
[0], operands
[1],
16346 operands
[2], operands
[3]);
16349 /* Branch to the end. */
16350 emit_jump_insn (gen_jump (end_label
));
16353 /* Generate 8bit unsigned divide. */
16354 emit_label (qimode_label
);
16355 /* Don't use operands[0] for result of 8bit divide since not all
16356 registers support QImode ZERO_EXTRACT. */
16357 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16358 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16359 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16360 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16364 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16365 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16369 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16370 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16373 /* Extract remainder from AH. */
16374 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16375 if (REG_P (operands
[1]))
16376 insn
= emit_move_insn (operands
[1], tmp1
);
16379 /* Need a new scratch register since the old one has result
16381 scratch
= gen_reg_rtx (mode
);
16382 emit_move_insn (scratch
, tmp1
);
16383 insn
= emit_move_insn (operands
[1], scratch
);
16385 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16387 /* Zero extend quotient from AL. */
16388 tmp1
= gen_lowpart (QImode
, tmp0
);
16389 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16390 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16392 emit_label (end_label
);
16395 #define LEA_MAX_STALL (3)
16396 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16398 /* Increase given DISTANCE in half-cycles according to
16399 dependencies between PREV and NEXT instructions.
16400 Add 1 half-cycle if there is no dependency and
16401 go to next cycle if there is some dependecy. */
16403 static unsigned int
16404 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16409 if (!prev
|| !next
)
16410 return distance
+ (distance
& 1) + 2;
16412 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16413 return distance
+ 1;
16415 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16416 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16417 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16418 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16419 return distance
+ (distance
& 1) + 2;
16421 return distance
+ 1;
16424 /* Function checks if instruction INSN defines register number
16425 REGNO1 or REGNO2. */
16428 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16433 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16434 if (DF_REF_REG_DEF_P (*def_rec
)
16435 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16436 && (regno1
== DF_REF_REGNO (*def_rec
)
16437 || regno2
== DF_REF_REGNO (*def_rec
)))
16445 /* Function checks if instruction INSN uses register number
16446 REGNO as a part of address expression. */
16449 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16453 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16454 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16460 /* Search backward for non-agu definition of register number REGNO1
16461 or register number REGNO2 in basic block starting from instruction
16462 START up to head of basic block or instruction INSN.
16464 Function puts true value into *FOUND var if definition was found
16465 and false otherwise.
16467 Distance in half-cycles between START and found instruction or head
16468 of BB is added to DISTANCE and returned. */
16471 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16472 rtx insn
, int distance
,
16473 rtx start
, bool *found
)
16475 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16483 && distance
< LEA_SEARCH_THRESHOLD
)
16485 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16487 distance
= increase_distance (prev
, next
, distance
);
16488 if (insn_defines_reg (regno1
, regno2
, prev
))
16490 if (recog_memoized (prev
) < 0
16491 || get_attr_type (prev
) != TYPE_LEA
)
16500 if (prev
== BB_HEAD (bb
))
16503 prev
= PREV_INSN (prev
);
16509 /* Search backward for non-agu definition of register number REGNO1
16510 or register number REGNO2 in INSN's basic block until
16511 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16512 2. Reach neighbour BBs boundary, or
16513 3. Reach agu definition.
16514 Returns the distance between the non-agu definition point and INSN.
16515 If no definition point, returns -1. */
16518 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16521 basic_block bb
= BLOCK_FOR_INSN (insn
);
16523 bool found
= false;
16525 if (insn
!= BB_HEAD (bb
))
16526 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16527 distance
, PREV_INSN (insn
),
16530 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16534 bool simple_loop
= false;
16536 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16539 simple_loop
= true;
16544 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16546 BB_END (bb
), &found
);
16549 int shortest_dist
= -1;
16550 bool found_in_bb
= false;
16552 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16555 = distance_non_agu_define_in_bb (regno1
, regno2
,
16561 if (shortest_dist
< 0)
16562 shortest_dist
= bb_dist
;
16563 else if (bb_dist
> 0)
16564 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16570 distance
= shortest_dist
;
16574 /* get_attr_type may modify recog data. We want to make sure
16575 that recog data is valid for instruction INSN, on which
16576 distance_non_agu_define is called. INSN is unchanged here. */
16577 extract_insn_cached (insn
);
16582 return distance
>> 1;
16585 /* Return the distance in half-cycles between INSN and the next
16586 insn that uses register number REGNO in memory address added
16587 to DISTANCE. Return -1 if REGNO0 is set.
16589 Put true value into *FOUND if register usage was found and
16591 Put true value into *REDEFINED if register redefinition was
16592 found and false otherwise. */
16595 distance_agu_use_in_bb (unsigned int regno
,
16596 rtx insn
, int distance
, rtx start
,
16597 bool *found
, bool *redefined
)
16599 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16604 *redefined
= false;
16608 && distance
< LEA_SEARCH_THRESHOLD
)
16610 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16612 distance
= increase_distance(prev
, next
, distance
);
16613 if (insn_uses_reg_mem (regno
, next
))
16615 /* Return DISTANCE if OP0 is used in memory
16616 address in NEXT. */
16621 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16623 /* Return -1 if OP0 is set in NEXT. */
16631 if (next
== BB_END (bb
))
16634 next
= NEXT_INSN (next
);
16640 /* Return the distance between INSN and the next insn that uses
16641 register number REGNO0 in memory address. Return -1 if no such
16642 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16645 distance_agu_use (unsigned int regno0
, rtx insn
)
16647 basic_block bb
= BLOCK_FOR_INSN (insn
);
16649 bool found
= false;
16650 bool redefined
= false;
16652 if (insn
!= BB_END (bb
))
16653 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16655 &found
, &redefined
);
16657 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16661 bool simple_loop
= false;
16663 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16666 simple_loop
= true;
16671 distance
= distance_agu_use_in_bb (regno0
, insn
,
16672 distance
, BB_HEAD (bb
),
16673 &found
, &redefined
);
16676 int shortest_dist
= -1;
16677 bool found_in_bb
= false;
16678 bool redefined_in_bb
= false;
16680 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16683 = distance_agu_use_in_bb (regno0
, insn
,
16684 distance
, BB_HEAD (e
->dest
),
16685 &found_in_bb
, &redefined_in_bb
);
16688 if (shortest_dist
< 0)
16689 shortest_dist
= bb_dist
;
16690 else if (bb_dist
> 0)
16691 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16697 distance
= shortest_dist
;
16701 if (!found
|| redefined
)
16704 return distance
>> 1;
16707 /* Define this macro to tune LEA priority vs ADD, it take effect when
16708 there is a dilemma of choicing LEA or ADD
16709 Negative value: ADD is more preferred than LEA
16711 Positive value: LEA is more preferred than ADD*/
16712 #define IX86_LEA_PRIORITY 0
16714 /* Return true if usage of lea INSN has performance advantage
16715 over a sequence of instructions. Instructions sequence has
16716 SPLIT_COST cycles higher latency than lea latency. */
16719 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16720 unsigned int regno2
, unsigned int split_cost
)
16722 int dist_define
, dist_use
;
16724 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16725 dist_use
= distance_agu_use (regno0
, insn
);
16727 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16729 /* If there is no non AGU operand definition, no AGU
16730 operand usage and split cost is 0 then both lea
16731 and non lea variants have same priority. Currently
16732 we prefer lea for 64 bit code and non lea on 32 bit
16734 if (dist_use
< 0 && split_cost
== 0)
16735 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16740 /* With longer definitions distance lea is more preferable.
16741 Here we change it to take into account splitting cost and
16743 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16745 /* If there is no use in memory addess then we just check
16746 that split cost does not exceed AGU stall. */
16748 return dist_define
>= LEA_MAX_STALL
;
16750 /* If this insn has both backward non-agu dependence and forward
16751 agu dependence, the one with short distance takes effect. */
16752 return dist_define
>= dist_use
;
16755 /* Return true if it is legal to clobber flags by INSN and
16756 false otherwise. */
16759 ix86_ok_to_clobber_flags (rtx insn
)
16761 basic_block bb
= BLOCK_FOR_INSN (insn
);
16767 if (NONDEBUG_INSN_P (insn
))
16769 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16770 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16773 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16777 if (insn
== BB_END (bb
))
16780 insn
= NEXT_INSN (insn
);
16783 live
= df_get_live_out(bb
);
16784 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16787 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16788 move and add to avoid AGU stalls. */
16791 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16793 unsigned int regno0
= true_regnum (operands
[0]);
16794 unsigned int regno1
= true_regnum (operands
[1]);
16795 unsigned int regno2
= true_regnum (operands
[2]);
16797 /* Check if we need to optimize. */
16798 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16801 /* Check it is correct to split here. */
16802 if (!ix86_ok_to_clobber_flags(insn
))
16805 /* We need to split only adds with non destructive
16806 destination operand. */
16807 if (regno0
== regno1
|| regno0
== regno2
)
16810 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16813 /* Return true if we should emit lea instruction instead of mov
16817 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
16819 unsigned int regno0
;
16820 unsigned int regno1
;
16822 /* Check if we need to optimize. */
16823 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16826 /* Use lea for reg to reg moves only. */
16827 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
16830 regno0
= true_regnum (operands
[0]);
16831 regno1
= true_regnum (operands
[1]);
16833 return ix86_lea_outperforms (insn
, regno0
, regno1
, -1, 0);
16836 /* Return true if we need to split lea into a sequence of
16837 instructions to avoid AGU stalls. */
16840 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
16842 unsigned int regno0
= true_regnum (operands
[0]) ;
16843 unsigned int regno1
= -1;
16844 unsigned int regno2
= -1;
16845 unsigned int split_cost
= 0;
16846 struct ix86_address parts
;
16849 /* Check we need to optimize. */
16850 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16853 /* Check it is correct to split here. */
16854 if (!ix86_ok_to_clobber_flags(insn
))
16857 ok
= ix86_decompose_address (operands
[1], &parts
);
16860 /* We should not split into add if non legitimate pic
16861 operand is used as displacement. */
16862 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16866 regno1
= true_regnum (parts
.base
);
16868 regno2
= true_regnum (parts
.index
);
16870 /* Compute how many cycles we will add to execution time
16871 if split lea into a sequence of instructions. */
16872 if (parts
.base
|| parts
.index
)
16874 /* Have to use mov instruction if non desctructive
16875 destination form is used. */
16876 if (regno1
!= regno0
&& regno2
!= regno0
)
16879 /* Have to add index to base if both exist. */
16880 if (parts
.base
&& parts
.index
)
16883 /* Have to use shift and adds if scale is 2 or greater. */
16884 if (parts
.scale
> 1)
16886 if (regno0
!= regno1
)
16888 else if (regno2
== regno0
)
16891 split_cost
+= parts
.scale
;
16894 /* Have to use add instruction with immediate if
16895 disp is non zero. */
16896 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16899 /* Subtract the price of lea. */
16903 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
16906 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16907 matches destination. RTX includes clobber of FLAGS_REG. */
16910 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
16915 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
16916 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16918 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16921 /* Split lea instructions into a sequence of instructions
16922 which are executed on ALU to avoid AGU stalls.
16923 It is assumed that it is allowed to clobber flags register
16924 at lea position. */
16927 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
16929 unsigned int regno0
= true_regnum (operands
[0]) ;
16930 unsigned int regno1
= INVALID_REGNUM
;
16931 unsigned int regno2
= INVALID_REGNUM
;
16932 struct ix86_address parts
;
16936 ok
= ix86_decompose_address (operands
[1], &parts
);
16941 if (GET_MODE (parts
.base
) != mode
)
16942 parts
.base
= gen_rtx_SUBREG (mode
, parts
.base
, 0);
16943 regno1
= true_regnum (parts
.base
);
16948 if (GET_MODE (parts
.index
) != mode
)
16949 parts
.index
= gen_rtx_SUBREG (mode
, parts
.index
, 0);
16950 regno2
= true_regnum (parts
.index
);
16953 if (parts
.scale
> 1)
16955 /* Case r1 = r1 + ... */
16956 if (regno1
== regno0
)
16958 /* If we have a case r1 = r1 + C * r1 then we
16959 should use multiplication which is very
16960 expensive. Assume cost model is wrong if we
16961 have such case here. */
16962 gcc_assert (regno2
!= regno0
);
16964 for (adds
= parts
.scale
; adds
> 0; adds
--)
16965 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.index
);
16969 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16970 if (regno0
!= regno2
)
16971 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16973 /* Use shift for scaling. */
16974 ix86_emit_binop (ASHIFT
, mode
, operands
[0],
16975 GEN_INT (exact_log2 (parts
.scale
)));
16978 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.base
);
16980 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16981 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16984 else if (!parts
.base
&& !parts
.index
)
16986 gcc_assert(parts
.disp
);
16987 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.disp
));
16993 if (regno0
!= regno2
)
16994 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16996 else if (!parts
.index
)
16998 if (regno0
!= regno1
)
16999 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
17003 if (regno0
== regno1
)
17005 else if (regno0
== regno2
)
17009 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
17013 ix86_emit_binop (PLUS
, mode
, operands
[0], tmp
);
17016 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17017 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
17021 /* Return true if it is ok to optimize an ADD operation to LEA
17022 operation to avoid flag register consumation. For most processors,
17023 ADD is faster than LEA. For the processors like ATOM, if the
17024 destination register of LEA holds an actual address which will be
17025 used soon, LEA is better and otherwise ADD is better. */
17028 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17030 unsigned int regno0
= true_regnum (operands
[0]);
17031 unsigned int regno1
= true_regnum (operands
[1]);
17032 unsigned int regno2
= true_regnum (operands
[2]);
17034 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17035 if (regno0
!= regno1
&& regno0
!= regno2
)
17038 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17041 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17044 /* Return true if destination reg of SET_BODY is shift count of
17048 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17054 /* Retrieve destination of SET_BODY. */
17055 switch (GET_CODE (set_body
))
17058 set_dest
= SET_DEST (set_body
);
17059 if (!set_dest
|| !REG_P (set_dest
))
17063 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17064 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17072 /* Retrieve shift count of USE_BODY. */
17073 switch (GET_CODE (use_body
))
17076 shift_rtx
= XEXP (use_body
, 1);
17079 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17080 if (ix86_dep_by_shift_count_body (set_body
,
17081 XVECEXP (use_body
, 0, i
)))
17089 && (GET_CODE (shift_rtx
) == ASHIFT
17090 || GET_CODE (shift_rtx
) == LSHIFTRT
17091 || GET_CODE (shift_rtx
) == ASHIFTRT
17092 || GET_CODE (shift_rtx
) == ROTATE
17093 || GET_CODE (shift_rtx
) == ROTATERT
))
17095 rtx shift_count
= XEXP (shift_rtx
, 1);
17097 /* Return true if shift count is dest of SET_BODY. */
17098 if (REG_P (shift_count
)
17099 && true_regnum (set_dest
) == true_regnum (shift_count
))
17106 /* Return true if destination reg of SET_INSN is shift count of
17110 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17112 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17113 PATTERN (use_insn
));
17116 /* Return TRUE or FALSE depending on whether the unary operator meets the
17117 appropriate constraints. */
17120 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17121 enum machine_mode mode ATTRIBUTE_UNUSED
,
17122 rtx operands
[2] ATTRIBUTE_UNUSED
)
17124 /* If one of operands is memory, source and destination must match. */
17125 if ((MEM_P (operands
[0])
17126 || MEM_P (operands
[1]))
17127 && ! rtx_equal_p (operands
[0], operands
[1]))
17132 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17133 are ok, keeping in mind the possible movddup alternative. */
17136 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17138 if (MEM_P (operands
[0]))
17139 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17140 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17141 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17145 /* Post-reload splitter for converting an SF or DFmode value in an
17146 SSE register into an unsigned SImode. */
17149 ix86_split_convert_uns_si_sse (rtx operands
[])
17151 enum machine_mode vecmode
;
17152 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17154 large
= operands
[1];
17155 zero_or_two31
= operands
[2];
17156 input
= operands
[3];
17157 two31
= operands
[4];
17158 vecmode
= GET_MODE (large
);
17159 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17161 /* Load up the value into the low element. We must ensure that the other
17162 elements are valid floats -- zero is the easiest such value. */
17165 if (vecmode
== V4SFmode
)
17166 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17168 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17172 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17173 emit_move_insn (value
, CONST0_RTX (vecmode
));
17174 if (vecmode
== V4SFmode
)
17175 emit_insn (gen_sse_movss (value
, value
, input
));
17177 emit_insn (gen_sse2_movsd (value
, value
, input
));
17180 emit_move_insn (large
, two31
);
17181 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17183 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17184 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17186 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17187 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17189 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17190 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17192 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17193 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17195 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17196 if (vecmode
== V4SFmode
)
17197 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17199 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17202 emit_insn (gen_xorv4si3 (value
, value
, large
));
17205 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17206 Expects the 64-bit DImode to be supplied in a pair of integral
17207 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17208 -mfpmath=sse, !optimize_size only. */
17211 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17213 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17214 rtx int_xmm
, fp_xmm
;
17215 rtx biases
, exponents
;
17218 int_xmm
= gen_reg_rtx (V4SImode
);
17219 if (TARGET_INTER_UNIT_MOVES
)
17220 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17221 else if (TARGET_SSE_SPLIT_REGS
)
17223 emit_clobber (int_xmm
);
17224 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17228 x
= gen_reg_rtx (V2DImode
);
17229 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17230 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17233 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17234 gen_rtvec (4, GEN_INT (0x43300000UL
),
17235 GEN_INT (0x45300000UL
),
17236 const0_rtx
, const0_rtx
));
17237 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17239 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17240 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17242 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17243 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17244 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17245 (0x1.0p84 + double(fp_value_hi_xmm)).
17246 Note these exponents differ by 32. */
17248 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17250 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17251 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17252 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17253 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17254 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17255 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17256 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17257 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17258 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17260 /* Add the upper and lower DFmode values together. */
17262 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17265 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17266 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17267 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17270 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17273 /* Not used, but eases macroization of patterns. */
17275 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17276 rtx input ATTRIBUTE_UNUSED
)
17278 gcc_unreachable ();
17281 /* Convert an unsigned SImode value into a DFmode. Only currently used
17282 for SSE, but applicable anywhere. */
17285 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17287 REAL_VALUE_TYPE TWO31r
;
17290 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17291 NULL
, 1, OPTAB_DIRECT
);
17293 fp
= gen_reg_rtx (DFmode
);
17294 emit_insn (gen_floatsidf2 (fp
, x
));
17296 real_ldexp (&TWO31r
, &dconst1
, 31);
17297 x
= const_double_from_real_value (TWO31r
, DFmode
);
17299 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17301 emit_move_insn (target
, x
);
17304 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17305 32-bit mode; otherwise we have a direct convert instruction. */
17308 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17310 REAL_VALUE_TYPE TWO32r
;
17311 rtx fp_lo
, fp_hi
, x
;
17313 fp_lo
= gen_reg_rtx (DFmode
);
17314 fp_hi
= gen_reg_rtx (DFmode
);
17316 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17318 real_ldexp (&TWO32r
, &dconst1
, 32);
17319 x
= const_double_from_real_value (TWO32r
, DFmode
);
17320 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17322 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17324 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17327 emit_move_insn (target
, x
);
17330 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17331 For x86_32, -mfpmath=sse, !optimize_size only. */
17333 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17335 REAL_VALUE_TYPE ONE16r
;
17336 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17338 real_ldexp (&ONE16r
, &dconst1
, 16);
17339 x
= const_double_from_real_value (ONE16r
, SFmode
);
17340 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17341 NULL
, 0, OPTAB_DIRECT
);
17342 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17343 NULL
, 0, OPTAB_DIRECT
);
17344 fp_hi
= gen_reg_rtx (SFmode
);
17345 fp_lo
= gen_reg_rtx (SFmode
);
17346 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17347 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17348 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17350 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17352 if (!rtx_equal_p (target
, fp_hi
))
17353 emit_move_insn (target
, fp_hi
);
17356 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17357 a vector of unsigned ints VAL to vector of floats TARGET. */
17360 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17363 REAL_VALUE_TYPE TWO16r
;
17364 enum machine_mode intmode
= GET_MODE (val
);
17365 enum machine_mode fltmode
= GET_MODE (target
);
17366 rtx (*cvt
) (rtx
, rtx
);
17368 if (intmode
== V4SImode
)
17369 cvt
= gen_floatv4siv4sf2
;
17371 cvt
= gen_floatv8siv8sf2
;
17372 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17373 tmp
[0] = force_reg (intmode
, tmp
[0]);
17374 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17376 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17377 NULL_RTX
, 1, OPTAB_DIRECT
);
17378 tmp
[3] = gen_reg_rtx (fltmode
);
17379 emit_insn (cvt (tmp
[3], tmp
[1]));
17380 tmp
[4] = gen_reg_rtx (fltmode
);
17381 emit_insn (cvt (tmp
[4], tmp
[2]));
17382 real_ldexp (&TWO16r
, &dconst1
, 16);
17383 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17384 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17385 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17387 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17389 if (tmp
[7] != target
)
17390 emit_move_insn (target
, tmp
[7]);
17393 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17394 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17395 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17396 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17399 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17401 REAL_VALUE_TYPE TWO31r
;
17402 rtx two31r
, tmp
[4];
17403 enum machine_mode mode
= GET_MODE (val
);
17404 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17405 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17406 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17409 for (i
= 0; i
< 3; i
++)
17410 tmp
[i
] = gen_reg_rtx (mode
);
17411 real_ldexp (&TWO31r
, &dconst1
, 31);
17412 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17413 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17414 two31r
= force_reg (mode
, two31r
);
17417 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17418 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17419 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17420 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17421 default: gcc_unreachable ();
17423 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17424 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17425 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17427 if (intmode
== V4SImode
|| TARGET_AVX2
)
17428 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17429 gen_lowpart (intmode
, tmp
[0]),
17430 GEN_INT (31), NULL_RTX
, 0,
17434 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17435 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17436 *xorp
= expand_simple_binop (intmode
, AND
,
17437 gen_lowpart (intmode
, tmp
[0]),
17438 two31
, NULL_RTX
, 0,
17441 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17445 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17446 then replicate the value for all elements of the vector
17450 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17454 enum machine_mode scalar_mode
;
17471 n_elt
= GET_MODE_NUNITS (mode
);
17472 v
= rtvec_alloc (n_elt
);
17473 scalar_mode
= GET_MODE_INNER (mode
);
17475 RTVEC_ELT (v
, 0) = value
;
17477 for (i
= 1; i
< n_elt
; ++i
)
17478 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17480 return gen_rtx_CONST_VECTOR (mode
, v
);
17483 gcc_unreachable ();
17487 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17488 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17489 for an SSE register. If VECT is true, then replicate the mask for
17490 all elements of the vector register. If INVERT is true, then create
17491 a mask excluding the sign bit. */
17494 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17496 enum machine_mode vec_mode
, imode
;
17497 HOST_WIDE_INT hi
, lo
;
17502 /* Find the sign bit, sign extended to 2*HWI. */
17510 mode
= GET_MODE_INNER (mode
);
17512 lo
= 0x80000000, hi
= lo
< 0;
17520 mode
= GET_MODE_INNER (mode
);
17522 if (HOST_BITS_PER_WIDE_INT
>= 64)
17523 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17525 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17530 vec_mode
= VOIDmode
;
17531 if (HOST_BITS_PER_WIDE_INT
>= 64)
17534 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17541 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17545 lo
= ~lo
, hi
= ~hi
;
17551 mask
= immed_double_const (lo
, hi
, imode
);
17553 vec
= gen_rtvec (2, v
, mask
);
17554 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17555 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17562 gcc_unreachable ();
17566 lo
= ~lo
, hi
= ~hi
;
17568 /* Force this value into the low part of a fp vector constant. */
17569 mask
= immed_double_const (lo
, hi
, imode
);
17570 mask
= gen_lowpart (mode
, mask
);
17572 if (vec_mode
== VOIDmode
)
17573 return force_reg (mode
, mask
);
17575 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17576 return force_reg (vec_mode
, v
);
17579 /* Generate code for floating point ABS or NEG. */
17582 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17585 rtx mask
, set
, dst
, src
;
17586 bool use_sse
= false;
17587 bool vector_mode
= VECTOR_MODE_P (mode
);
17588 enum machine_mode vmode
= mode
;
17592 else if (mode
== TFmode
)
17594 else if (TARGET_SSE_MATH
)
17596 use_sse
= SSE_FLOAT_MODE_P (mode
);
17597 if (mode
== SFmode
)
17599 else if (mode
== DFmode
)
17603 /* NEG and ABS performed with SSE use bitwise mask operations.
17604 Create the appropriate mask now. */
17606 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17613 set
= gen_rtx_fmt_e (code
, mode
, src
);
17614 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17621 use
= gen_rtx_USE (VOIDmode
, mask
);
17623 par
= gen_rtvec (2, set
, use
);
17626 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17627 par
= gen_rtvec (3, set
, use
, clob
);
17629 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17635 /* Expand a copysign operation. Special case operand 0 being a constant. */
17638 ix86_expand_copysign (rtx operands
[])
17640 enum machine_mode mode
, vmode
;
17641 rtx dest
, op0
, op1
, mask
, nmask
;
17643 dest
= operands
[0];
17647 mode
= GET_MODE (dest
);
17649 if (mode
== SFmode
)
17651 else if (mode
== DFmode
)
17656 if (GET_CODE (op0
) == CONST_DOUBLE
)
17658 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17660 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17661 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17663 if (mode
== SFmode
|| mode
== DFmode
)
17665 if (op0
== CONST0_RTX (mode
))
17666 op0
= CONST0_RTX (vmode
);
17669 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17671 op0
= force_reg (vmode
, v
);
17674 else if (op0
!= CONST0_RTX (mode
))
17675 op0
= force_reg (mode
, op0
);
17677 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17679 if (mode
== SFmode
)
17680 copysign_insn
= gen_copysignsf3_const
;
17681 else if (mode
== DFmode
)
17682 copysign_insn
= gen_copysigndf3_const
;
17684 copysign_insn
= gen_copysigntf3_const
;
17686 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17690 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17692 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17693 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17695 if (mode
== SFmode
)
17696 copysign_insn
= gen_copysignsf3_var
;
17697 else if (mode
== DFmode
)
17698 copysign_insn
= gen_copysigndf3_var
;
17700 copysign_insn
= gen_copysigntf3_var
;
17702 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17706 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17707 be a constant, and so has already been expanded into a vector constant. */
17710 ix86_split_copysign_const (rtx operands
[])
17712 enum machine_mode mode
, vmode
;
17713 rtx dest
, op0
, mask
, x
;
17715 dest
= operands
[0];
17717 mask
= operands
[3];
17719 mode
= GET_MODE (dest
);
17720 vmode
= GET_MODE (mask
);
17722 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17723 x
= gen_rtx_AND (vmode
, dest
, mask
);
17724 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17726 if (op0
!= CONST0_RTX (vmode
))
17728 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17729 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17733 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17734 so we have to do two masks. */
17737 ix86_split_copysign_var (rtx operands
[])
17739 enum machine_mode mode
, vmode
;
17740 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17742 dest
= operands
[0];
17743 scratch
= operands
[1];
17746 nmask
= operands
[4];
17747 mask
= operands
[5];
17749 mode
= GET_MODE (dest
);
17750 vmode
= GET_MODE (mask
);
17752 if (rtx_equal_p (op0
, op1
))
17754 /* Shouldn't happen often (it's useless, obviously), but when it does
17755 we'd generate incorrect code if we continue below. */
17756 emit_move_insn (dest
, op0
);
17760 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17762 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17764 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17765 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17768 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17769 x
= gen_rtx_NOT (vmode
, dest
);
17770 x
= gen_rtx_AND (vmode
, x
, op0
);
17771 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17775 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17777 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17779 else /* alternative 2,4 */
17781 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17782 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17783 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17785 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17787 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17789 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17790 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17792 else /* alternative 3,4 */
17794 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17796 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17797 x
= gen_rtx_AND (vmode
, dest
, op0
);
17799 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17802 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
17803 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17806 /* Return TRUE or FALSE depending on whether the first SET in INSN
17807 has source and destination with matching CC modes, and that the
17808 CC mode is at least as constrained as REQ_MODE. */
17811 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
17814 enum machine_mode set_mode
;
17816 set
= PATTERN (insn
);
17817 if (GET_CODE (set
) == PARALLEL
)
17818 set
= XVECEXP (set
, 0, 0);
17819 gcc_assert (GET_CODE (set
) == SET
);
17820 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
17822 set_mode
= GET_MODE (SET_DEST (set
));
17826 if (req_mode
!= CCNOmode
17827 && (req_mode
!= CCmode
17828 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
17832 if (req_mode
== CCGCmode
)
17836 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
17840 if (req_mode
== CCZmode
)
17850 if (set_mode
!= req_mode
)
17855 gcc_unreachable ();
17858 return GET_MODE (SET_SRC (set
)) == set_mode
;
17861 /* Generate insn patterns to do an integer compare of OPERANDS. */
17864 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17866 enum machine_mode cmpmode
;
17869 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17870 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17872 /* This is very simple, but making the interface the same as in the
17873 FP case makes the rest of the code easier. */
17874 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17875 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17877 /* Return the test that should be put into the flags user, i.e.
17878 the bcc, scc, or cmov instruction. */
17879 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17882 /* Figure out whether to use ordered or unordered fp comparisons.
17883 Return the appropriate mode to use. */
17886 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17888 /* ??? In order to make all comparisons reversible, we do all comparisons
17889 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17890 all forms trapping and nontrapping comparisons, we can make inequality
17891 comparisons trapping again, since it results in better code when using
17892 FCOM based compares. */
17893 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17897 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17899 enum machine_mode mode
= GET_MODE (op0
);
17901 if (SCALAR_FLOAT_MODE_P (mode
))
17903 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17904 return ix86_fp_compare_mode (code
);
17909 /* Only zero flag is needed. */
17910 case EQ
: /* ZF=0 */
17911 case NE
: /* ZF!=0 */
17913 /* Codes needing carry flag. */
17914 case GEU
: /* CF=0 */
17915 case LTU
: /* CF=1 */
17916 /* Detect overflow checks. They need just the carry flag. */
17917 if (GET_CODE (op0
) == PLUS
17918 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17922 case GTU
: /* CF=0 & ZF=0 */
17923 case LEU
: /* CF=1 | ZF=1 */
17924 /* Detect overflow checks. They need just the carry flag. */
17925 if (GET_CODE (op0
) == MINUS
17926 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17930 /* Codes possibly doable only with sign flag when
17931 comparing against zero. */
17932 case GE
: /* SF=OF or SF=0 */
17933 case LT
: /* SF<>OF or SF=1 */
17934 if (op1
== const0_rtx
)
17937 /* For other cases Carry flag is not required. */
17939 /* Codes doable only with sign flag when comparing
17940 against zero, but we miss jump instruction for it
17941 so we need to use relational tests against overflow
17942 that thus needs to be zero. */
17943 case GT
: /* ZF=0 & SF=OF */
17944 case LE
: /* ZF=1 | SF<>OF */
17945 if (op1
== const0_rtx
)
17949 /* strcmp pattern do (use flags) and combine may ask us for proper
17954 gcc_unreachable ();
17958 /* Return the fixed registers used for condition codes. */
17961 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
17968 /* If two condition code modes are compatible, return a condition code
17969 mode which is compatible with both. Otherwise, return
17972 static enum machine_mode
17973 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
17978 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
17981 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
17982 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
17985 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
17987 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
17993 gcc_unreachable ();
18023 /* These are only compatible with themselves, which we already
18030 /* Return a comparison we can do and that it is equivalent to
18031 swap_condition (code) apart possibly from orderedness.
18032 But, never change orderedness if TARGET_IEEE_FP, returning
18033 UNKNOWN in that case if necessary. */
18035 static enum rtx_code
18036 ix86_fp_swap_condition (enum rtx_code code
)
18040 case GT
: /* GTU - CF=0 & ZF=0 */
18041 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18042 case GE
: /* GEU - CF=0 */
18043 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18044 case UNLT
: /* LTU - CF=1 */
18045 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18046 case UNLE
: /* LEU - CF=1 | ZF=1 */
18047 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18049 return swap_condition (code
);
18053 /* Return cost of comparison CODE using the best strategy for performance.
18054 All following functions do use number of instructions as a cost metrics.
18055 In future this should be tweaked to compute bytes for optimize_size and
18056 take into account performance of various instructions on various CPUs. */
18059 ix86_fp_comparison_cost (enum rtx_code code
)
18063 /* The cost of code using bit-twiddling on %ah. */
18080 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18084 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18087 gcc_unreachable ();
18090 switch (ix86_fp_comparison_strategy (code
))
18092 case IX86_FPCMP_COMI
:
18093 return arith_cost
> 4 ? 3 : 2;
18094 case IX86_FPCMP_SAHF
:
18095 return arith_cost
> 4 ? 4 : 3;
18101 /* Return strategy to use for floating-point. We assume that fcomi is always
18102 preferrable where available, since that is also true when looking at size
18103 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18105 enum ix86_fpcmp_strategy
18106 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18108 /* Do fcomi/sahf based test when profitable. */
18111 return IX86_FPCMP_COMI
;
18113 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18114 return IX86_FPCMP_SAHF
;
18116 return IX86_FPCMP_ARITH
;
18119 /* Swap, force into registers, or otherwise massage the two operands
18120 to a fp comparison. The operands are updated in place; the new
18121 comparison code is returned. */
18123 static enum rtx_code
18124 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18126 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18127 rtx op0
= *pop0
, op1
= *pop1
;
18128 enum machine_mode op_mode
= GET_MODE (op0
);
18129 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18131 /* All of the unordered compare instructions only work on registers.
18132 The same is true of the fcomi compare instructions. The XFmode
18133 compare instructions require registers except when comparing
18134 against zero or when converting operand 1 from fixed point to
18138 && (fpcmp_mode
== CCFPUmode
18139 || (op_mode
== XFmode
18140 && ! (standard_80387_constant_p (op0
) == 1
18141 || standard_80387_constant_p (op1
) == 1)
18142 && GET_CODE (op1
) != FLOAT
)
18143 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18145 op0
= force_reg (op_mode
, op0
);
18146 op1
= force_reg (op_mode
, op1
);
18150 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18151 things around if they appear profitable, otherwise force op0
18152 into a register. */
18154 if (standard_80387_constant_p (op0
) == 0
18156 && ! (standard_80387_constant_p (op1
) == 0
18159 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18160 if (new_code
!= UNKNOWN
)
18163 tmp
= op0
, op0
= op1
, op1
= tmp
;
18169 op0
= force_reg (op_mode
, op0
);
18171 if (CONSTANT_P (op1
))
18173 int tmp
= standard_80387_constant_p (op1
);
18175 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18179 op1
= force_reg (op_mode
, op1
);
18182 op1
= force_reg (op_mode
, op1
);
18186 /* Try to rearrange the comparison to make it cheaper. */
18187 if (ix86_fp_comparison_cost (code
)
18188 > ix86_fp_comparison_cost (swap_condition (code
))
18189 && (REG_P (op1
) || can_create_pseudo_p ()))
18192 tmp
= op0
, op0
= op1
, op1
= tmp
;
18193 code
= swap_condition (code
);
18195 op0
= force_reg (op_mode
, op0
);
18203 /* Convert comparison codes we use to represent FP comparison to integer
18204 code that will result in proper branch. Return UNKNOWN if no such code
18208 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18237 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18240 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18242 enum machine_mode fpcmp_mode
, intcmp_mode
;
18245 fpcmp_mode
= ix86_fp_compare_mode (code
);
18246 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18248 /* Do fcomi/sahf based test when profitable. */
18249 switch (ix86_fp_comparison_strategy (code
))
18251 case IX86_FPCMP_COMI
:
18252 intcmp_mode
= fpcmp_mode
;
18253 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18254 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18259 case IX86_FPCMP_SAHF
:
18260 intcmp_mode
= fpcmp_mode
;
18261 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18262 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18266 scratch
= gen_reg_rtx (HImode
);
18267 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18268 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18271 case IX86_FPCMP_ARITH
:
18272 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18273 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18274 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18276 scratch
= gen_reg_rtx (HImode
);
18277 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18279 /* In the unordered case, we have to check C2 for NaN's, which
18280 doesn't happen to work out to anything nice combination-wise.
18281 So do some bit twiddling on the value we've got in AH to come
18282 up with an appropriate set of condition codes. */
18284 intcmp_mode
= CCNOmode
;
18289 if (code
== GT
|| !TARGET_IEEE_FP
)
18291 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18296 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18297 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18298 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18299 intcmp_mode
= CCmode
;
18305 if (code
== LT
&& TARGET_IEEE_FP
)
18307 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18308 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18309 intcmp_mode
= CCmode
;
18314 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18320 if (code
== GE
|| !TARGET_IEEE_FP
)
18322 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18327 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18328 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18334 if (code
== LE
&& TARGET_IEEE_FP
)
18336 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18337 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18338 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18339 intcmp_mode
= CCmode
;
18344 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18350 if (code
== EQ
&& TARGET_IEEE_FP
)
18352 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18353 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18354 intcmp_mode
= CCmode
;
18359 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18365 if (code
== NE
&& TARGET_IEEE_FP
)
18367 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18368 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18374 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18380 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18384 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18389 gcc_unreachable ();
18397 /* Return the test that should be put into the flags user, i.e.
18398 the bcc, scc, or cmov instruction. */
18399 return gen_rtx_fmt_ee (code
, VOIDmode
,
18400 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18405 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18409 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18410 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18412 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18414 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18415 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18418 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18424 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18426 enum machine_mode mode
= GET_MODE (op0
);
18438 tmp
= ix86_expand_compare (code
, op0
, op1
);
18439 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18440 gen_rtx_LABEL_REF (VOIDmode
, label
),
18442 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18449 /* Expand DImode branch into multiple compare+branch. */
18451 rtx lo
[2], hi
[2], label2
;
18452 enum rtx_code code1
, code2
, code3
;
18453 enum machine_mode submode
;
18455 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18457 tmp
= op0
, op0
= op1
, op1
= tmp
;
18458 code
= swap_condition (code
);
18461 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18462 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18464 submode
= mode
== DImode
? SImode
: DImode
;
18466 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18467 avoid two branches. This costs one extra insn, so disable when
18468 optimizing for size. */
18470 if ((code
== EQ
|| code
== NE
)
18471 && (!optimize_insn_for_size_p ()
18472 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18477 if (hi
[1] != const0_rtx
)
18478 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18479 NULL_RTX
, 0, OPTAB_WIDEN
);
18482 if (lo
[1] != const0_rtx
)
18483 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18484 NULL_RTX
, 0, OPTAB_WIDEN
);
18486 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18487 NULL_RTX
, 0, OPTAB_WIDEN
);
18489 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18493 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18494 op1 is a constant and the low word is zero, then we can just
18495 examine the high word. Similarly for low word -1 and
18496 less-or-equal-than or greater-than. */
18498 if (CONST_INT_P (hi
[1]))
18501 case LT
: case LTU
: case GE
: case GEU
:
18502 if (lo
[1] == const0_rtx
)
18504 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18508 case LE
: case LEU
: case GT
: case GTU
:
18509 if (lo
[1] == constm1_rtx
)
18511 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18519 /* Otherwise, we need two or three jumps. */
18521 label2
= gen_label_rtx ();
18524 code2
= swap_condition (code
);
18525 code3
= unsigned_condition (code
);
18529 case LT
: case GT
: case LTU
: case GTU
:
18532 case LE
: code1
= LT
; code2
= GT
; break;
18533 case GE
: code1
= GT
; code2
= LT
; break;
18534 case LEU
: code1
= LTU
; code2
= GTU
; break;
18535 case GEU
: code1
= GTU
; code2
= LTU
; break;
18537 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18538 case NE
: code2
= UNKNOWN
; break;
18541 gcc_unreachable ();
18546 * if (hi(a) < hi(b)) goto true;
18547 * if (hi(a) > hi(b)) goto false;
18548 * if (lo(a) < lo(b)) goto true;
18552 if (code1
!= UNKNOWN
)
18553 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18554 if (code2
!= UNKNOWN
)
18555 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18557 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18559 if (code2
!= UNKNOWN
)
18560 emit_label (label2
);
18565 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18570 /* Split branch based on floating point condition. */
18572 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18573 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18578 if (target2
!= pc_rtx
)
18581 code
= reverse_condition_maybe_unordered (code
);
18586 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18589 /* Remove pushed operand from stack. */
18591 ix86_free_from_memory (GET_MODE (pushed
));
18593 i
= emit_jump_insn (gen_rtx_SET
18595 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18596 condition
, target1
, target2
)));
18597 if (split_branch_probability
>= 0)
18598 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18602 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18606 gcc_assert (GET_MODE (dest
) == QImode
);
18608 ret
= ix86_expand_compare (code
, op0
, op1
);
18609 PUT_MODE (ret
, QImode
);
18610 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18613 /* Expand comparison setting or clearing carry flag. Return true when
18614 successful and set pop for the operation. */
18616 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18618 enum machine_mode mode
=
18619 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18621 /* Do not handle double-mode compares that go through special path. */
18622 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18625 if (SCALAR_FLOAT_MODE_P (mode
))
18627 rtx compare_op
, compare_seq
;
18629 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18631 /* Shortcut: following common codes never translate
18632 into carry flag compares. */
18633 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18634 || code
== ORDERED
|| code
== UNORDERED
)
18637 /* These comparisons require zero flag; swap operands so they won't. */
18638 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18639 && !TARGET_IEEE_FP
)
18644 code
= swap_condition (code
);
18647 /* Try to expand the comparison and verify that we end up with
18648 carry flag based comparison. This fails to be true only when
18649 we decide to expand comparison using arithmetic that is not
18650 too common scenario. */
18652 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18653 compare_seq
= get_insns ();
18656 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18657 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18658 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18660 code
= GET_CODE (compare_op
);
18662 if (code
!= LTU
&& code
!= GEU
)
18665 emit_insn (compare_seq
);
18670 if (!INTEGRAL_MODE_P (mode
))
18679 /* Convert a==0 into (unsigned)a<1. */
18682 if (op1
!= const0_rtx
)
18685 code
= (code
== EQ
? LTU
: GEU
);
18688 /* Convert a>b into b<a or a>=b-1. */
18691 if (CONST_INT_P (op1
))
18693 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18694 /* Bail out on overflow. We still can swap operands but that
18695 would force loading of the constant into register. */
18696 if (op1
== const0_rtx
18697 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18699 code
= (code
== GTU
? GEU
: LTU
);
18706 code
= (code
== GTU
? LTU
: GEU
);
18710 /* Convert a>=0 into (unsigned)a<0x80000000. */
18713 if (mode
== DImode
|| op1
!= const0_rtx
)
18715 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18716 code
= (code
== LT
? GEU
: LTU
);
18720 if (mode
== DImode
|| op1
!= constm1_rtx
)
18722 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18723 code
= (code
== LE
? GEU
: LTU
);
18729 /* Swapping operands may cause constant to appear as first operand. */
18730 if (!nonimmediate_operand (op0
, VOIDmode
))
18732 if (!can_create_pseudo_p ())
18734 op0
= force_reg (mode
, op0
);
18736 *pop
= ix86_expand_compare (code
, op0
, op1
);
18737 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18742 ix86_expand_int_movcc (rtx operands
[])
18744 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18745 rtx compare_seq
, compare_op
;
18746 enum machine_mode mode
= GET_MODE (operands
[0]);
18747 bool sign_bit_compare_p
= false;
18748 rtx op0
= XEXP (operands
[1], 0);
18749 rtx op1
= XEXP (operands
[1], 1);
18751 if (GET_MODE (op0
) == TImode
18752 || (GET_MODE (op0
) == DImode
18757 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18758 compare_seq
= get_insns ();
18761 compare_code
= GET_CODE (compare_op
);
18763 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18764 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18765 sign_bit_compare_p
= true;
18767 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18768 HImode insns, we'd be swallowed in word prefix ops. */
18770 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18771 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18772 && CONST_INT_P (operands
[2])
18773 && CONST_INT_P (operands
[3]))
18775 rtx out
= operands
[0];
18776 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18777 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18778 HOST_WIDE_INT diff
;
18781 /* Sign bit compares are better done using shifts than we do by using
18783 if (sign_bit_compare_p
18784 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18786 /* Detect overlap between destination and compare sources. */
18789 if (!sign_bit_compare_p
)
18792 bool fpcmp
= false;
18794 compare_code
= GET_CODE (compare_op
);
18796 flags
= XEXP (compare_op
, 0);
18798 if (GET_MODE (flags
) == CCFPmode
18799 || GET_MODE (flags
) == CCFPUmode
)
18803 = ix86_fp_compare_code_to_integer (compare_code
);
18806 /* To simplify rest of code, restrict to the GEU case. */
18807 if (compare_code
== LTU
)
18809 HOST_WIDE_INT tmp
= ct
;
18812 compare_code
= reverse_condition (compare_code
);
18813 code
= reverse_condition (code
);
18818 PUT_CODE (compare_op
,
18819 reverse_condition_maybe_unordered
18820 (GET_CODE (compare_op
)));
18822 PUT_CODE (compare_op
,
18823 reverse_condition (GET_CODE (compare_op
)));
18827 if (reg_overlap_mentioned_p (out
, op0
)
18828 || reg_overlap_mentioned_p (out
, op1
))
18829 tmp
= gen_reg_rtx (mode
);
18831 if (mode
== DImode
)
18832 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
18834 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
18835 flags
, compare_op
));
18839 if (code
== GT
|| code
== GE
)
18840 code
= reverse_condition (code
);
18843 HOST_WIDE_INT tmp
= ct
;
18848 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18861 tmp
= expand_simple_binop (mode
, PLUS
,
18863 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18874 tmp
= expand_simple_binop (mode
, IOR
,
18876 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18878 else if (diff
== -1 && ct
)
18888 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18890 tmp
= expand_simple_binop (mode
, PLUS
,
18891 copy_rtx (tmp
), GEN_INT (cf
),
18892 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18900 * andl cf - ct, dest
18910 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18913 tmp
= expand_simple_binop (mode
, AND
,
18915 gen_int_mode (cf
- ct
, mode
),
18916 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18918 tmp
= expand_simple_binop (mode
, PLUS
,
18919 copy_rtx (tmp
), GEN_INT (ct
),
18920 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18923 if (!rtx_equal_p (tmp
, out
))
18924 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18931 enum machine_mode cmp_mode
= GET_MODE (op0
);
18934 tmp
= ct
, ct
= cf
, cf
= tmp
;
18937 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18939 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18941 /* We may be reversing unordered compare to normal compare, that
18942 is not valid in general (we may convert non-trapping condition
18943 to trapping one), however on i386 we currently emit all
18944 comparisons unordered. */
18945 compare_code
= reverse_condition_maybe_unordered (compare_code
);
18946 code
= reverse_condition_maybe_unordered (code
);
18950 compare_code
= reverse_condition (compare_code
);
18951 code
= reverse_condition (code
);
18955 compare_code
= UNKNOWN
;
18956 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
18957 && CONST_INT_P (op1
))
18959 if (op1
== const0_rtx
18960 && (code
== LT
|| code
== GE
))
18961 compare_code
= code
;
18962 else if (op1
== constm1_rtx
)
18966 else if (code
== GT
)
18971 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18972 if (compare_code
!= UNKNOWN
18973 && GET_MODE (op0
) == GET_MODE (out
)
18974 && (cf
== -1 || ct
== -1))
18976 /* If lea code below could be used, only optimize
18977 if it results in a 2 insn sequence. */
18979 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18980 || diff
== 3 || diff
== 5 || diff
== 9)
18981 || (compare_code
== LT
&& ct
== -1)
18982 || (compare_code
== GE
&& cf
== -1))
18985 * notl op1 (if necessary)
18993 code
= reverse_condition (code
);
18996 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18998 out
= expand_simple_binop (mode
, IOR
,
19000 out
, 1, OPTAB_DIRECT
);
19001 if (out
!= operands
[0])
19002 emit_move_insn (operands
[0], out
);
19009 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19010 || diff
== 3 || diff
== 5 || diff
== 9)
19011 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19013 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19019 * lea cf(dest*(ct-cf)),dest
19023 * This also catches the degenerate setcc-only case.
19029 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19032 /* On x86_64 the lea instruction operates on Pmode, so we need
19033 to get arithmetics done in proper mode to match. */
19035 tmp
= copy_rtx (out
);
19039 out1
= copy_rtx (out
);
19040 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19044 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19050 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19053 if (!rtx_equal_p (tmp
, out
))
19056 out
= force_operand (tmp
, copy_rtx (out
));
19058 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19060 if (!rtx_equal_p (out
, operands
[0]))
19061 emit_move_insn (operands
[0], copy_rtx (out
));
19067 * General case: Jumpful:
19068 * xorl dest,dest cmpl op1, op2
19069 * cmpl op1, op2 movl ct, dest
19070 * setcc dest jcc 1f
19071 * decl dest movl cf, dest
19072 * andl (cf-ct),dest 1:
19075 * Size 20. Size 14.
19077 * This is reasonably steep, but branch mispredict costs are
19078 * high on modern cpus, so consider failing only if optimizing
19082 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19083 && BRANCH_COST (optimize_insn_for_speed_p (),
19088 enum machine_mode cmp_mode
= GET_MODE (op0
);
19093 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19095 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19097 /* We may be reversing unordered compare to normal compare,
19098 that is not valid in general (we may convert non-trapping
19099 condition to trapping one), however on i386 we currently
19100 emit all comparisons unordered. */
19101 code
= reverse_condition_maybe_unordered (code
);
19105 code
= reverse_condition (code
);
19106 if (compare_code
!= UNKNOWN
)
19107 compare_code
= reverse_condition (compare_code
);
19111 if (compare_code
!= UNKNOWN
)
19113 /* notl op1 (if needed)
19118 For x < 0 (resp. x <= -1) there will be no notl,
19119 so if possible swap the constants to get rid of the
19121 True/false will be -1/0 while code below (store flag
19122 followed by decrement) is 0/-1, so the constants need
19123 to be exchanged once more. */
19125 if (compare_code
== GE
|| !cf
)
19127 code
= reverse_condition (code
);
19132 HOST_WIDE_INT tmp
= cf
;
19137 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19141 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19143 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19145 copy_rtx (out
), 1, OPTAB_DIRECT
);
19148 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19149 gen_int_mode (cf
- ct
, mode
),
19150 copy_rtx (out
), 1, OPTAB_DIRECT
);
19152 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19153 copy_rtx (out
), 1, OPTAB_DIRECT
);
19154 if (!rtx_equal_p (out
, operands
[0]))
19155 emit_move_insn (operands
[0], copy_rtx (out
));
19161 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19163 /* Try a few things more with specific constants and a variable. */
19166 rtx var
, orig_out
, out
, tmp
;
19168 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19171 /* If one of the two operands is an interesting constant, load a
19172 constant with the above and mask it in with a logical operation. */
19174 if (CONST_INT_P (operands
[2]))
19177 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19178 operands
[3] = constm1_rtx
, op
= and_optab
;
19179 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19180 operands
[3] = const0_rtx
, op
= ior_optab
;
19184 else if (CONST_INT_P (operands
[3]))
19187 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19188 operands
[2] = constm1_rtx
, op
= and_optab
;
19189 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19190 operands
[2] = const0_rtx
, op
= ior_optab
;
19197 orig_out
= operands
[0];
19198 tmp
= gen_reg_rtx (mode
);
19201 /* Recurse to get the constant loaded. */
19202 if (ix86_expand_int_movcc (operands
) == 0)
19205 /* Mask in the interesting variable. */
19206 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19208 if (!rtx_equal_p (out
, orig_out
))
19209 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19215 * For comparison with above,
19225 if (! nonimmediate_operand (operands
[2], mode
))
19226 operands
[2] = force_reg (mode
, operands
[2]);
19227 if (! nonimmediate_operand (operands
[3], mode
))
19228 operands
[3] = force_reg (mode
, operands
[3]);
19230 if (! register_operand (operands
[2], VOIDmode
)
19232 || ! register_operand (operands
[3], VOIDmode
)))
19233 operands
[2] = force_reg (mode
, operands
[2]);
19236 && ! register_operand (operands
[3], VOIDmode
))
19237 operands
[3] = force_reg (mode
, operands
[3]);
19239 emit_insn (compare_seq
);
19240 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19241 gen_rtx_IF_THEN_ELSE (mode
,
19242 compare_op
, operands
[2],
19247 /* Swap, force into registers, or otherwise massage the two operands
19248 to an sse comparison with a mask result. Thus we differ a bit from
19249 ix86_prepare_fp_compare_args which expects to produce a flags result.
19251 The DEST operand exists to help determine whether to commute commutative
19252 operators. The POP0/POP1 operands are updated in place. The new
19253 comparison code is returned, or UNKNOWN if not implementable. */
19255 static enum rtx_code
19256 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19257 rtx
*pop0
, rtx
*pop1
)
19265 /* AVX supports all the needed comparisons. */
19268 /* We have no LTGT as an operator. We could implement it with
19269 NE & ORDERED, but this requires an extra temporary. It's
19270 not clear that it's worth it. */
19277 /* These are supported directly. */
19284 /* AVX has 3 operand comparisons, no need to swap anything. */
19287 /* For commutative operators, try to canonicalize the destination
19288 operand to be first in the comparison - this helps reload to
19289 avoid extra moves. */
19290 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19298 /* These are not supported directly before AVX, and furthermore
19299 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19300 comparison operands to transform into something that is
19305 code
= swap_condition (code
);
19309 gcc_unreachable ();
19315 /* Detect conditional moves that exactly match min/max operational
19316 semantics. Note that this is IEEE safe, as long as we don't
19317 interchange the operands.
19319 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19320 and TRUE if the operation is successful and instructions are emitted. */
19323 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19324 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19326 enum machine_mode mode
;
19332 else if (code
== UNGE
)
19335 if_true
= if_false
;
19341 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19343 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19348 mode
= GET_MODE (dest
);
19350 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19351 but MODE may be a vector mode and thus not appropriate. */
19352 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19354 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19357 if_true
= force_reg (mode
, if_true
);
19358 v
= gen_rtvec (2, if_true
, if_false
);
19359 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19363 code
= is_min
? SMIN
: SMAX
;
19364 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19367 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19371 /* Expand an sse vector comparison. Return the register with the result. */
19374 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19375 rtx op_true
, rtx op_false
)
19377 enum machine_mode mode
= GET_MODE (dest
);
19378 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19381 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19382 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19383 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19386 || reg_overlap_mentioned_p (dest
, op_true
)
19387 || reg_overlap_mentioned_p (dest
, op_false
))
19388 dest
= gen_reg_rtx (mode
);
19390 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19391 if (cmp_mode
!= mode
)
19393 x
= force_reg (cmp_mode
, x
);
19394 convert_move (dest
, x
, false);
19397 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19402 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19403 operations. This is used for both scalar and vector conditional moves. */
19406 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19408 enum machine_mode mode
= GET_MODE (dest
);
19411 if (vector_all_ones_operand (op_true
, mode
)
19412 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19414 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19416 else if (op_false
== CONST0_RTX (mode
))
19418 op_true
= force_reg (mode
, op_true
);
19419 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19420 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19422 else if (op_true
== CONST0_RTX (mode
))
19424 op_false
= force_reg (mode
, op_false
);
19425 x
= gen_rtx_NOT (mode
, cmp
);
19426 x
= gen_rtx_AND (mode
, x
, op_false
);
19427 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19429 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19431 op_false
= force_reg (mode
, op_false
);
19432 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19433 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19435 else if (TARGET_XOP
)
19437 op_true
= force_reg (mode
, op_true
);
19439 if (!nonimmediate_operand (op_false
, mode
))
19440 op_false
= force_reg (mode
, op_false
);
19442 emit_insn (gen_rtx_SET (mode
, dest
,
19443 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19449 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19451 if (!nonimmediate_operand (op_true
, mode
))
19452 op_true
= force_reg (mode
, op_true
);
19454 op_false
= force_reg (mode
, op_false
);
19460 gen
= gen_sse4_1_blendvps
;
19464 gen
= gen_sse4_1_blendvpd
;
19472 gen
= gen_sse4_1_pblendvb
;
19473 dest
= gen_lowpart (V16QImode
, dest
);
19474 op_false
= gen_lowpart (V16QImode
, op_false
);
19475 op_true
= gen_lowpart (V16QImode
, op_true
);
19476 cmp
= gen_lowpart (V16QImode
, cmp
);
19481 gen
= gen_avx_blendvps256
;
19485 gen
= gen_avx_blendvpd256
;
19493 gen
= gen_avx2_pblendvb
;
19494 dest
= gen_lowpart (V32QImode
, dest
);
19495 op_false
= gen_lowpart (V32QImode
, op_false
);
19496 op_true
= gen_lowpart (V32QImode
, op_true
);
19497 cmp
= gen_lowpart (V32QImode
, cmp
);
19505 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19508 op_true
= force_reg (mode
, op_true
);
19510 t2
= gen_reg_rtx (mode
);
19512 t3
= gen_reg_rtx (mode
);
19516 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19517 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19519 x
= gen_rtx_NOT (mode
, cmp
);
19520 x
= gen_rtx_AND (mode
, x
, op_false
);
19521 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19523 x
= gen_rtx_IOR (mode
, t3
, t2
);
19524 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19529 /* Expand a floating-point conditional move. Return true if successful. */
19532 ix86_expand_fp_movcc (rtx operands
[])
19534 enum machine_mode mode
= GET_MODE (operands
[0]);
19535 enum rtx_code code
= GET_CODE (operands
[1]);
19536 rtx tmp
, compare_op
;
19537 rtx op0
= XEXP (operands
[1], 0);
19538 rtx op1
= XEXP (operands
[1], 1);
19540 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19542 enum machine_mode cmode
;
19544 /* Since we've no cmove for sse registers, don't force bad register
19545 allocation just to gain access to it. Deny movcc when the
19546 comparison mode doesn't match the move mode. */
19547 cmode
= GET_MODE (op0
);
19548 if (cmode
== VOIDmode
)
19549 cmode
= GET_MODE (op1
);
19553 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19554 if (code
== UNKNOWN
)
19557 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19558 operands
[2], operands
[3]))
19561 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19562 operands
[2], operands
[3]);
19563 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19567 /* The floating point conditional move instructions don't directly
19568 support conditions resulting from a signed integer comparison. */
19570 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19571 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19573 tmp
= gen_reg_rtx (QImode
);
19574 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19576 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19579 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19580 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19581 operands
[2], operands
[3])));
19586 /* Expand a floating-point vector conditional move; a vcond operation
19587 rather than a movcc operation. */
19590 ix86_expand_fp_vcond (rtx operands
[])
19592 enum rtx_code code
= GET_CODE (operands
[3]);
19595 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19596 &operands
[4], &operands
[5]);
19597 if (code
== UNKNOWN
)
19600 switch (GET_CODE (operands
[3]))
19603 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19604 operands
[5], operands
[0], operands
[0]);
19605 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19606 operands
[5], operands
[1], operands
[2]);
19610 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19611 operands
[5], operands
[0], operands
[0]);
19612 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19613 operands
[5], operands
[1], operands
[2]);
19617 gcc_unreachable ();
19619 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19621 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19625 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19626 operands
[5], operands
[1], operands
[2]))
19629 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19630 operands
[1], operands
[2]);
19631 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19635 /* Expand a signed/unsigned integral vector conditional move. */
19638 ix86_expand_int_vcond (rtx operands
[])
19640 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19641 enum machine_mode mode
= GET_MODE (operands
[4]);
19642 enum rtx_code code
= GET_CODE (operands
[3]);
19643 bool negate
= false;
19646 cop0
= operands
[4];
19647 cop1
= operands
[5];
19649 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19650 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19651 if ((code
== LT
|| code
== GE
)
19652 && data_mode
== mode
19653 && cop1
== CONST0_RTX (mode
)
19654 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
19655 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
19656 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
19657 && (GET_MODE_SIZE (data_mode
) == 16
19658 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
19660 rtx negop
= operands
[2 - (code
== LT
)];
19661 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
19662 if (negop
== CONST1_RTX (data_mode
))
19664 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
19665 operands
[0], 1, OPTAB_DIRECT
);
19666 if (res
!= operands
[0])
19667 emit_move_insn (operands
[0], res
);
19670 else if (GET_MODE_INNER (data_mode
) != DImode
19671 && vector_all_ones_operand (negop
, data_mode
))
19673 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
19674 operands
[0], 0, OPTAB_DIRECT
);
19675 if (res
!= operands
[0])
19676 emit_move_insn (operands
[0], res
);
19681 if (!nonimmediate_operand (cop1
, mode
))
19682 cop1
= force_reg (mode
, cop1
);
19683 if (!general_operand (operands
[1], data_mode
))
19684 operands
[1] = force_reg (data_mode
, operands
[1]);
19685 if (!general_operand (operands
[2], data_mode
))
19686 operands
[2] = force_reg (data_mode
, operands
[2]);
19688 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19690 && (mode
== V16QImode
|| mode
== V8HImode
19691 || mode
== V4SImode
|| mode
== V2DImode
))
19695 /* Canonicalize the comparison to EQ, GT, GTU. */
19706 code
= reverse_condition (code
);
19712 code
= reverse_condition (code
);
19718 code
= swap_condition (code
);
19719 x
= cop0
, cop0
= cop1
, cop1
= x
;
19723 gcc_unreachable ();
19726 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19727 if (mode
== V2DImode
)
19732 /* SSE4.1 supports EQ. */
19733 if (!TARGET_SSE4_1
)
19739 /* SSE4.2 supports GT/GTU. */
19740 if (!TARGET_SSE4_2
)
19745 gcc_unreachable ();
19749 /* Unsigned parallel compare is not supported by the hardware.
19750 Play some tricks to turn this into a signed comparison
19754 cop0
= force_reg (mode
, cop0
);
19764 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19768 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19769 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19770 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19771 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19773 gcc_unreachable ();
19775 /* Subtract (-(INT MAX) - 1) from both operands to make
19777 mask
= ix86_build_signbit_mask (mode
, true, false);
19778 t1
= gen_reg_rtx (mode
);
19779 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19781 t2
= gen_reg_rtx (mode
);
19782 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19794 /* Perform a parallel unsigned saturating subtraction. */
19795 x
= gen_reg_rtx (mode
);
19796 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19797 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19800 cop1
= CONST0_RTX (mode
);
19806 gcc_unreachable ();
19811 /* Allow the comparison to be done in one mode, but the movcc to
19812 happen in another mode. */
19813 if (data_mode
== mode
)
19815 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
19816 operands
[1+negate
], operands
[2-negate
]);
19820 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
19821 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
19823 operands
[1+negate
], operands
[2-negate
]);
19824 x
= gen_lowpart (data_mode
, x
);
19827 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
19828 operands
[2-negate
]);
19832 /* Expand a variable vector permutation. */
19835 ix86_expand_vec_perm (rtx operands
[])
19837 rtx target
= operands
[0];
19838 rtx op0
= operands
[1];
19839 rtx op1
= operands
[2];
19840 rtx mask
= operands
[3];
19841 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
19842 enum machine_mode mode
= GET_MODE (op0
);
19843 enum machine_mode maskmode
= GET_MODE (mask
);
19845 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
19847 /* Number of elements in the vector. */
19848 w
= GET_MODE_NUNITS (mode
);
19849 e
= GET_MODE_UNIT_SIZE (mode
);
19850 gcc_assert (w
<= 32);
19854 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
19856 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19857 an constant shuffle operand. With a tiny bit of effort we can
19858 use VPERMD instead. A re-interpretation stall for V4DFmode is
19859 unfortunate but there's no avoiding it.
19860 Similarly for V16HImode we don't have instructions for variable
19861 shuffling, while for V32QImode we can use after preparing suitable
19862 masks vpshufb; vpshufb; vpermq; vpor. */
19864 if (mode
== V16HImode
)
19866 maskmode
= mode
= V32QImode
;
19872 maskmode
= mode
= V8SImode
;
19876 t1
= gen_reg_rtx (maskmode
);
19878 /* Replicate the low bits of the V4DImode mask into V8SImode:
19880 t1 = { A A B B C C D D }. */
19881 for (i
= 0; i
< w
/ 2; ++i
)
19882 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
19883 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19884 vt
= force_reg (maskmode
, vt
);
19885 mask
= gen_lowpart (maskmode
, mask
);
19886 if (maskmode
== V8SImode
)
19887 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
19889 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
19891 /* Multiply the shuffle indicies by two. */
19892 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
19895 /* Add one to the odd shuffle indicies:
19896 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19897 for (i
= 0; i
< w
/ 2; ++i
)
19899 vec
[i
* 2] = const0_rtx
;
19900 vec
[i
* 2 + 1] = const1_rtx
;
19902 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19903 vt
= force_const_mem (maskmode
, vt
);
19904 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
19907 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19908 operands
[3] = mask
= t1
;
19909 target
= gen_lowpart (mode
, target
);
19910 op0
= gen_lowpart (mode
, op0
);
19911 op1
= gen_lowpart (mode
, op1
);
19917 /* The VPERMD and VPERMPS instructions already properly ignore
19918 the high bits of the shuffle elements. No need for us to
19919 perform an AND ourselves. */
19920 if (one_operand_shuffle
)
19921 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
19924 t1
= gen_reg_rtx (V8SImode
);
19925 t2
= gen_reg_rtx (V8SImode
);
19926 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
19927 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
19933 mask
= gen_lowpart (V8SFmode
, mask
);
19934 if (one_operand_shuffle
)
19935 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
19938 t1
= gen_reg_rtx (V8SFmode
);
19939 t2
= gen_reg_rtx (V8SFmode
);
19940 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
19941 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
19947 /* By combining the two 128-bit input vectors into one 256-bit
19948 input vector, we can use VPERMD and VPERMPS for the full
19949 two-operand shuffle. */
19950 t1
= gen_reg_rtx (V8SImode
);
19951 t2
= gen_reg_rtx (V8SImode
);
19952 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
19953 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
19954 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
19955 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
19959 t1
= gen_reg_rtx (V8SFmode
);
19960 t2
= gen_reg_rtx (V8SImode
);
19961 mask
= gen_lowpart (V4SImode
, mask
);
19962 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
19963 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
19964 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
19965 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
19969 t1
= gen_reg_rtx (V32QImode
);
19970 t2
= gen_reg_rtx (V32QImode
);
19971 t3
= gen_reg_rtx (V32QImode
);
19972 vt2
= GEN_INT (128);
19973 for (i
= 0; i
< 32; i
++)
19975 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19976 vt
= force_reg (V32QImode
, vt
);
19977 for (i
= 0; i
< 32; i
++)
19978 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
19979 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19980 vt2
= force_reg (V32QImode
, vt2
);
19981 /* From mask create two adjusted masks, which contain the same
19982 bits as mask in the low 7 bits of each vector element.
19983 The first mask will have the most significant bit clear
19984 if it requests element from the same 128-bit lane
19985 and MSB set if it requests element from the other 128-bit lane.
19986 The second mask will have the opposite values of the MSB,
19987 and additionally will have its 128-bit lanes swapped.
19988 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19989 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19990 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19991 stands for other 12 bytes. */
19992 /* The bit whether element is from the same lane or the other
19993 lane is bit 4, so shift it up by 3 to the MSB position. */
19994 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
19995 gen_lowpart (V4DImode
, mask
),
19997 /* Clear MSB bits from the mask just in case it had them set. */
19998 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
19999 /* After this t1 will have MSB set for elements from other lane. */
20000 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20001 /* Clear bits other than MSB. */
20002 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20003 /* Or in the lower bits from mask into t3. */
20004 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20005 /* And invert MSB bits in t1, so MSB is set for elements from the same
20007 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20008 /* Swap 128-bit lanes in t3. */
20009 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20010 gen_lowpart (V4DImode
, t3
),
20011 const2_rtx
, GEN_INT (3),
20012 const0_rtx
, const1_rtx
));
20013 /* And or in the lower bits from mask into t1. */
20014 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20015 if (one_operand_shuffle
)
20017 /* Each of these shuffles will put 0s in places where
20018 element from the other 128-bit lane is needed, otherwise
20019 will shuffle in the requested value. */
20020 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20021 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20022 /* For t3 the 128-bit lanes are swapped again. */
20023 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20024 gen_lowpart (V4DImode
, t3
),
20025 const2_rtx
, GEN_INT (3),
20026 const0_rtx
, const1_rtx
));
20027 /* And oring both together leads to the result. */
20028 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20032 t4
= gen_reg_rtx (V32QImode
);
20033 /* Similarly to the above one_operand_shuffle code,
20034 just for repeated twice for each operand. merge_two:
20035 code will merge the two results together. */
20036 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20037 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20038 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20039 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20040 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20041 gen_lowpart (V4DImode
, t4
),
20042 const2_rtx
, GEN_INT (3),
20043 const0_rtx
, const1_rtx
));
20044 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20045 gen_lowpart (V4DImode
, t3
),
20046 const2_rtx
, GEN_INT (3),
20047 const0_rtx
, const1_rtx
));
20048 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20049 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20055 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20062 /* The XOP VPPERM insn supports three inputs. By ignoring the
20063 one_operand_shuffle special case, we avoid creating another
20064 set of constant vectors in memory. */
20065 one_operand_shuffle
= false;
20067 /* mask = mask & {2*w-1, ...} */
20068 vt
= GEN_INT (2*w
- 1);
20072 /* mask = mask & {w-1, ...} */
20073 vt
= GEN_INT (w
- 1);
20076 for (i
= 0; i
< w
; i
++)
20078 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20079 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20080 NULL_RTX
, 0, OPTAB_DIRECT
);
20082 /* For non-QImode operations, convert the word permutation control
20083 into a byte permutation control. */
20084 if (mode
!= V16QImode
)
20086 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20087 GEN_INT (exact_log2 (e
)),
20088 NULL_RTX
, 0, OPTAB_DIRECT
);
20090 /* Convert mask to vector of chars. */
20091 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20093 /* Replicate each of the input bytes into byte positions:
20094 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20095 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20096 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20097 for (i
= 0; i
< 16; ++i
)
20098 vec
[i
] = GEN_INT (i
/e
* e
);
20099 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20100 vt
= force_const_mem (V16QImode
, vt
);
20102 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20104 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20106 /* Convert it into the byte positions by doing
20107 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20108 for (i
= 0; i
< 16; ++i
)
20109 vec
[i
] = GEN_INT (i
% e
);
20110 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20111 vt
= force_const_mem (V16QImode
, vt
);
20112 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20115 /* The actual shuffle operations all operate on V16QImode. */
20116 op0
= gen_lowpart (V16QImode
, op0
);
20117 op1
= gen_lowpart (V16QImode
, op1
);
20118 target
= gen_lowpart (V16QImode
, target
);
20122 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20124 else if (one_operand_shuffle
)
20126 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20133 /* Shuffle the two input vectors independently. */
20134 t1
= gen_reg_rtx (V16QImode
);
20135 t2
= gen_reg_rtx (V16QImode
);
20136 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20137 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20140 /* Then merge them together. The key is whether any given control
20141 element contained a bit set that indicates the second word. */
20142 mask
= operands
[3];
20144 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20146 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20147 more shuffle to convert the V2DI input mask into a V4SI
20148 input mask. At which point the masking that expand_int_vcond
20149 will work as desired. */
20150 rtx t3
= gen_reg_rtx (V4SImode
);
20151 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20152 const0_rtx
, const0_rtx
,
20153 const2_rtx
, const2_rtx
));
20155 maskmode
= V4SImode
;
20159 for (i
= 0; i
< w
; i
++)
20161 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20162 vt
= force_reg (maskmode
, vt
);
20163 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20164 NULL_RTX
, 0, OPTAB_DIRECT
);
20166 xops
[0] = gen_lowpart (mode
, operands
[0]);
20167 xops
[1] = gen_lowpart (mode
, t2
);
20168 xops
[2] = gen_lowpart (mode
, t1
);
20169 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20172 ok
= ix86_expand_int_vcond (xops
);
20177 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20178 true if we should do zero extension, else sign extension. HIGH_P is
20179 true if we want the N/2 high elements, else the low elements. */
20182 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
20184 enum machine_mode imode
= GET_MODE (operands
[1]);
20189 rtx (*unpack
)(rtx
, rtx
);
20190 rtx (*extract
)(rtx
, rtx
) = NULL
;
20191 enum machine_mode halfmode
= BLKmode
;
20197 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20199 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20200 halfmode
= V16QImode
;
20202 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20206 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20208 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20209 halfmode
= V8HImode
;
20211 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20215 unpack
= gen_avx2_zero_extendv4siv4di2
;
20217 unpack
= gen_avx2_sign_extendv4siv4di2
;
20218 halfmode
= V4SImode
;
20220 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20224 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20226 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20230 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20232 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20236 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20238 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20241 gcc_unreachable ();
20244 if (GET_MODE_SIZE (imode
) == 32)
20246 tmp
= gen_reg_rtx (halfmode
);
20247 emit_insn (extract (tmp
, operands
[1]));
20251 /* Shift higher 8 bytes to lower 8 bytes. */
20252 tmp
= gen_reg_rtx (imode
);
20253 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20254 gen_lowpart (V1TImode
, operands
[1]),
20260 emit_insn (unpack (operands
[0], tmp
));
20264 rtx (*unpack
)(rtx
, rtx
, rtx
);
20270 unpack
= gen_vec_interleave_highv16qi
;
20272 unpack
= gen_vec_interleave_lowv16qi
;
20276 unpack
= gen_vec_interleave_highv8hi
;
20278 unpack
= gen_vec_interleave_lowv8hi
;
20282 unpack
= gen_vec_interleave_highv4si
;
20284 unpack
= gen_vec_interleave_lowv4si
;
20287 gcc_unreachable ();
20290 dest
= gen_lowpart (imode
, operands
[0]);
20293 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20295 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20296 operands
[1], pc_rtx
, pc_rtx
);
20298 emit_insn (unpack (dest
, operands
[1], tmp
));
20302 /* Expand conditional increment or decrement using adb/sbb instructions.
20303 The default case using setcc followed by the conditional move can be
20304 done by generic code. */
20306 ix86_expand_int_addcc (rtx operands
[])
20308 enum rtx_code code
= GET_CODE (operands
[1]);
20310 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20312 rtx val
= const0_rtx
;
20313 bool fpcmp
= false;
20314 enum machine_mode mode
;
20315 rtx op0
= XEXP (operands
[1], 0);
20316 rtx op1
= XEXP (operands
[1], 1);
20318 if (operands
[3] != const1_rtx
20319 && operands
[3] != constm1_rtx
)
20321 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20323 code
= GET_CODE (compare_op
);
20325 flags
= XEXP (compare_op
, 0);
20327 if (GET_MODE (flags
) == CCFPmode
20328 || GET_MODE (flags
) == CCFPUmode
)
20331 code
= ix86_fp_compare_code_to_integer (code
);
20338 PUT_CODE (compare_op
,
20339 reverse_condition_maybe_unordered
20340 (GET_CODE (compare_op
)));
20342 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20345 mode
= GET_MODE (operands
[0]);
20347 /* Construct either adc or sbb insn. */
20348 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20353 insn
= gen_subqi3_carry
;
20356 insn
= gen_subhi3_carry
;
20359 insn
= gen_subsi3_carry
;
20362 insn
= gen_subdi3_carry
;
20365 gcc_unreachable ();
20373 insn
= gen_addqi3_carry
;
20376 insn
= gen_addhi3_carry
;
20379 insn
= gen_addsi3_carry
;
20382 insn
= gen_adddi3_carry
;
20385 gcc_unreachable ();
20388 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20394 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20395 but works for floating pointer parameters and nonoffsetable memories.
20396 For pushes, it returns just stack offsets; the values will be saved
20397 in the right order. Maximally three parts are generated. */
20400 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20405 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20407 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20409 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20410 gcc_assert (size
>= 2 && size
<= 4);
20412 /* Optimize constant pool reference to immediates. This is used by fp
20413 moves, that force all constants to memory to allow combining. */
20414 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20416 rtx tmp
= maybe_get_pool_constant (operand
);
20421 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20423 /* The only non-offsetable memories we handle are pushes. */
20424 int ok
= push_operand (operand
, VOIDmode
);
20428 operand
= copy_rtx (operand
);
20429 PUT_MODE (operand
, word_mode
);
20430 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20434 if (GET_CODE (operand
) == CONST_VECTOR
)
20436 enum machine_mode imode
= int_mode_for_mode (mode
);
20437 /* Caution: if we looked through a constant pool memory above,
20438 the operand may actually have a different mode now. That's
20439 ok, since we want to pun this all the way back to an integer. */
20440 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20441 gcc_assert (operand
!= NULL
);
20447 if (mode
== DImode
)
20448 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20453 if (REG_P (operand
))
20455 gcc_assert (reload_completed
);
20456 for (i
= 0; i
< size
; i
++)
20457 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20459 else if (offsettable_memref_p (operand
))
20461 operand
= adjust_address (operand
, SImode
, 0);
20462 parts
[0] = operand
;
20463 for (i
= 1; i
< size
; i
++)
20464 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20466 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20471 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20475 real_to_target (l
, &r
, mode
);
20476 parts
[3] = gen_int_mode (l
[3], SImode
);
20477 parts
[2] = gen_int_mode (l
[2], SImode
);
20480 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20481 parts
[2] = gen_int_mode (l
[2], SImode
);
20484 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20487 gcc_unreachable ();
20489 parts
[1] = gen_int_mode (l
[1], SImode
);
20490 parts
[0] = gen_int_mode (l
[0], SImode
);
20493 gcc_unreachable ();
20498 if (mode
== TImode
)
20499 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20500 if (mode
== XFmode
|| mode
== TFmode
)
20502 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20503 if (REG_P (operand
))
20505 gcc_assert (reload_completed
);
20506 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20507 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20509 else if (offsettable_memref_p (operand
))
20511 operand
= adjust_address (operand
, DImode
, 0);
20512 parts
[0] = operand
;
20513 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20515 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20520 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20521 real_to_target (l
, &r
, mode
);
20523 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20524 if (HOST_BITS_PER_WIDE_INT
>= 64)
20527 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20528 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20531 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20533 if (upper_mode
== SImode
)
20534 parts
[1] = gen_int_mode (l
[2], SImode
);
20535 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20538 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20539 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20542 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20545 gcc_unreachable ();
20552 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20553 Return false when normal moves are needed; true when all required
20554 insns have been emitted. Operands 2-4 contain the input values
20555 int the correct order; operands 5-7 contain the output values. */
20558 ix86_split_long_move (rtx operands
[])
20563 int collisions
= 0;
20564 enum machine_mode mode
= GET_MODE (operands
[0]);
20565 bool collisionparts
[4];
20567 /* The DFmode expanders may ask us to move double.
20568 For 64bit target this is single move. By hiding the fact
20569 here we simplify i386.md splitters. */
20570 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20572 /* Optimize constant pool reference to immediates. This is used by
20573 fp moves, that force all constants to memory to allow combining. */
20575 if (MEM_P (operands
[1])
20576 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20577 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20578 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20579 if (push_operand (operands
[0], VOIDmode
))
20581 operands
[0] = copy_rtx (operands
[0]);
20582 PUT_MODE (operands
[0], word_mode
);
20585 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20586 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20587 emit_move_insn (operands
[0], operands
[1]);
20591 /* The only non-offsettable memory we handle is push. */
20592 if (push_operand (operands
[0], VOIDmode
))
20595 gcc_assert (!MEM_P (operands
[0])
20596 || offsettable_memref_p (operands
[0]));
20598 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20599 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20601 /* When emitting push, take care for source operands on the stack. */
20602 if (push
&& MEM_P (operands
[1])
20603 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20605 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20607 /* Compensate for the stack decrement by 4. */
20608 if (!TARGET_64BIT
&& nparts
== 3
20609 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20610 src_base
= plus_constant (Pmode
, src_base
, 4);
20612 /* src_base refers to the stack pointer and is
20613 automatically decreased by emitted push. */
20614 for (i
= 0; i
< nparts
; i
++)
20615 part
[1][i
] = change_address (part
[1][i
],
20616 GET_MODE (part
[1][i
]), src_base
);
20619 /* We need to do copy in the right order in case an address register
20620 of the source overlaps the destination. */
20621 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20625 for (i
= 0; i
< nparts
; i
++)
20628 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20629 if (collisionparts
[i
])
20633 /* Collision in the middle part can be handled by reordering. */
20634 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20636 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20637 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20639 else if (collisions
== 1
20641 && (collisionparts
[1] || collisionparts
[2]))
20643 if (collisionparts
[1])
20645 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20646 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20650 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20651 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20655 /* If there are more collisions, we can't handle it by reordering.
20656 Do an lea to the last part and use only one colliding move. */
20657 else if (collisions
> 1)
20663 base
= part
[0][nparts
- 1];
20665 /* Handle the case when the last part isn't valid for lea.
20666 Happens in 64-bit mode storing the 12-byte XFmode. */
20667 if (GET_MODE (base
) != Pmode
)
20668 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20670 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20671 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20672 for (i
= 1; i
< nparts
; i
++)
20674 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
20675 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20686 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20687 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
20688 stack_pointer_rtx
, GEN_INT (-4)));
20689 emit_move_insn (part
[0][2], part
[1][2]);
20691 else if (nparts
== 4)
20693 emit_move_insn (part
[0][3], part
[1][3]);
20694 emit_move_insn (part
[0][2], part
[1][2]);
20699 /* In 64bit mode we don't have 32bit push available. In case this is
20700 register, it is OK - we will just use larger counterpart. We also
20701 retype memory - these comes from attempt to avoid REX prefix on
20702 moving of second half of TFmode value. */
20703 if (GET_MODE (part
[1][1]) == SImode
)
20705 switch (GET_CODE (part
[1][1]))
20708 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20712 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20716 gcc_unreachable ();
20719 if (GET_MODE (part
[1][0]) == SImode
)
20720 part
[1][0] = part
[1][1];
20723 emit_move_insn (part
[0][1], part
[1][1]);
20724 emit_move_insn (part
[0][0], part
[1][0]);
20728 /* Choose correct order to not overwrite the source before it is copied. */
20729 if ((REG_P (part
[0][0])
20730 && REG_P (part
[1][1])
20731 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20733 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20735 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20737 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20739 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20741 operands
[2 + i
] = part
[0][j
];
20742 operands
[6 + i
] = part
[1][j
];
20747 for (i
= 0; i
< nparts
; i
++)
20749 operands
[2 + i
] = part
[0][i
];
20750 operands
[6 + i
] = part
[1][i
];
20754 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20755 if (optimize_insn_for_size_p ())
20757 for (j
= 0; j
< nparts
- 1; j
++)
20758 if (CONST_INT_P (operands
[6 + j
])
20759 && operands
[6 + j
] != const0_rtx
20760 && REG_P (operands
[2 + j
]))
20761 for (i
= j
; i
< nparts
- 1; i
++)
20762 if (CONST_INT_P (operands
[7 + i
])
20763 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20764 operands
[7 + i
] = operands
[2 + j
];
20767 for (i
= 0; i
< nparts
; i
++)
20768 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20773 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20774 left shift by a constant, either using a single shift or
20775 a sequence of add instructions. */
20778 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20780 rtx (*insn
)(rtx
, rtx
, rtx
);
20783 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20784 && !optimize_insn_for_size_p ()))
20786 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20787 while (count
-- > 0)
20788 emit_insn (insn (operand
, operand
, operand
));
20792 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20793 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20798 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20800 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20801 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20802 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20804 rtx low
[2], high
[2];
20807 if (CONST_INT_P (operands
[2]))
20809 split_double_mode (mode
, operands
, 2, low
, high
);
20810 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20812 if (count
>= half_width
)
20814 emit_move_insn (high
[0], low
[1]);
20815 emit_move_insn (low
[0], const0_rtx
);
20817 if (count
> half_width
)
20818 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
20822 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20824 if (!rtx_equal_p (operands
[0], operands
[1]))
20825 emit_move_insn (operands
[0], operands
[1]);
20827 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
20828 ix86_expand_ashl_const (low
[0], count
, mode
);
20833 split_double_mode (mode
, operands
, 1, low
, high
);
20835 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20837 if (operands
[1] == const1_rtx
)
20839 /* Assuming we've chosen a QImode capable registers, then 1 << N
20840 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20841 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
20843 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
20845 ix86_expand_clear (low
[0]);
20846 ix86_expand_clear (high
[0]);
20847 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
20849 d
= gen_lowpart (QImode
, low
[0]);
20850 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20851 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
20852 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20854 d
= gen_lowpart (QImode
, high
[0]);
20855 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20856 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
20857 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20860 /* Otherwise, we can get the same results by manually performing
20861 a bit extract operation on bit 5/6, and then performing the two
20862 shifts. The two methods of getting 0/1 into low/high are exactly
20863 the same size. Avoiding the shift in the bit extract case helps
20864 pentium4 a bit; no one else seems to care much either way. */
20867 enum machine_mode half_mode
;
20868 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
20869 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
20870 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
20871 HOST_WIDE_INT bits
;
20874 if (mode
== DImode
)
20876 half_mode
= SImode
;
20877 gen_lshr3
= gen_lshrsi3
;
20878 gen_and3
= gen_andsi3
;
20879 gen_xor3
= gen_xorsi3
;
20884 half_mode
= DImode
;
20885 gen_lshr3
= gen_lshrdi3
;
20886 gen_and3
= gen_anddi3
;
20887 gen_xor3
= gen_xordi3
;
20891 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
20892 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
20894 x
= gen_lowpart (half_mode
, operands
[2]);
20895 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
20897 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
20898 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
20899 emit_move_insn (low
[0], high
[0]);
20900 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
20903 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20904 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
20908 if (operands
[1] == constm1_rtx
)
20910 /* For -1 << N, we can avoid the shld instruction, because we
20911 know that we're shifting 0...31/63 ones into a -1. */
20912 emit_move_insn (low
[0], constm1_rtx
);
20913 if (optimize_insn_for_size_p ())
20914 emit_move_insn (high
[0], low
[0]);
20916 emit_move_insn (high
[0], constm1_rtx
);
20920 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20922 if (!rtx_equal_p (operands
[0], operands
[1]))
20923 emit_move_insn (operands
[0], operands
[1]);
20925 split_double_mode (mode
, operands
, 1, low
, high
);
20926 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
20929 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20931 if (TARGET_CMOVE
&& scratch
)
20933 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20934 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20936 ix86_expand_clear (scratch
);
20937 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
20941 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20942 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20944 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
20949 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20951 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
20952 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
20953 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20954 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20956 rtx low
[2], high
[2];
20959 if (CONST_INT_P (operands
[2]))
20961 split_double_mode (mode
, operands
, 2, low
, high
);
20962 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20964 if (count
== GET_MODE_BITSIZE (mode
) - 1)
20966 emit_move_insn (high
[0], high
[1]);
20967 emit_insn (gen_ashr3 (high
[0], high
[0],
20968 GEN_INT (half_width
- 1)));
20969 emit_move_insn (low
[0], high
[0]);
20972 else if (count
>= half_width
)
20974 emit_move_insn (low
[0], high
[1]);
20975 emit_move_insn (high
[0], low
[0]);
20976 emit_insn (gen_ashr3 (high
[0], high
[0],
20977 GEN_INT (half_width
- 1)));
20979 if (count
> half_width
)
20980 emit_insn (gen_ashr3 (low
[0], low
[0],
20981 GEN_INT (count
- half_width
)));
20985 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20987 if (!rtx_equal_p (operands
[0], operands
[1]))
20988 emit_move_insn (operands
[0], operands
[1]);
20990 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20991 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
20996 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20998 if (!rtx_equal_p (operands
[0], operands
[1]))
20999 emit_move_insn (operands
[0], operands
[1]);
21001 split_double_mode (mode
, operands
, 1, low
, high
);
21003 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21004 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21006 if (TARGET_CMOVE
&& scratch
)
21008 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21009 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21011 emit_move_insn (scratch
, high
[0]);
21012 emit_insn (gen_ashr3 (scratch
, scratch
,
21013 GEN_INT (half_width
- 1)));
21014 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21019 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21020 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21022 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21028 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21030 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21031 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21032 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21033 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21035 rtx low
[2], high
[2];
21038 if (CONST_INT_P (operands
[2]))
21040 split_double_mode (mode
, operands
, 2, low
, high
);
21041 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21043 if (count
>= half_width
)
21045 emit_move_insn (low
[0], high
[1]);
21046 ix86_expand_clear (high
[0]);
21048 if (count
> half_width
)
21049 emit_insn (gen_lshr3 (low
[0], low
[0],
21050 GEN_INT (count
- half_width
)));
21054 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21056 if (!rtx_equal_p (operands
[0], operands
[1]))
21057 emit_move_insn (operands
[0], operands
[1]);
21059 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21060 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21065 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21067 if (!rtx_equal_p (operands
[0], operands
[1]))
21068 emit_move_insn (operands
[0], operands
[1]);
21070 split_double_mode (mode
, operands
, 1, low
, high
);
21072 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21073 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21075 if (TARGET_CMOVE
&& scratch
)
21077 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21078 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21080 ix86_expand_clear (scratch
);
21081 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21086 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21087 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21089 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21094 /* Predict just emitted jump instruction to be taken with probability PROB. */
21096 predict_jump (int prob
)
21098 rtx insn
= get_last_insn ();
21099 gcc_assert (JUMP_P (insn
));
21100 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21103 /* Helper function for the string operations below. Dest VARIABLE whether
21104 it is aligned to VALUE bytes. If true, jump to the label. */
21106 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21108 rtx label
= gen_label_rtx ();
21109 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21110 if (GET_MODE (variable
) == DImode
)
21111 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21113 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21114 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21117 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21119 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21123 /* Adjust COUNTER by the VALUE. */
21125 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21127 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21128 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21130 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21133 /* Zero extend possibly SImode EXP to Pmode register. */
21135 ix86_zero_extend_to_Pmode (rtx exp
)
21137 if (GET_MODE (exp
) != Pmode
)
21138 exp
= convert_to_mode (Pmode
, exp
, 1);
21139 return force_reg (Pmode
, exp
);
21142 /* Divide COUNTREG by SCALE. */
21144 scale_counter (rtx countreg
, int scale
)
21150 if (CONST_INT_P (countreg
))
21151 return GEN_INT (INTVAL (countreg
) / scale
);
21152 gcc_assert (REG_P (countreg
));
21154 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21155 GEN_INT (exact_log2 (scale
)),
21156 NULL
, 1, OPTAB_DIRECT
);
21160 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21161 DImode for constant loop counts. */
21163 static enum machine_mode
21164 counter_mode (rtx count_exp
)
21166 if (GET_MODE (count_exp
) != VOIDmode
)
21167 return GET_MODE (count_exp
);
21168 if (!CONST_INT_P (count_exp
))
21170 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21175 /* When SRCPTR is non-NULL, output simple loop to move memory
21176 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21177 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21178 equivalent loop to set memory by VALUE (supposed to be in MODE).
21180 The size is rounded down to whole number of chunk size moved at once.
21181 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21185 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21186 rtx destptr
, rtx srcptr
, rtx value
,
21187 rtx count
, enum machine_mode mode
, int unroll
,
21190 rtx out_label
, top_label
, iter
, tmp
;
21191 enum machine_mode iter_mode
= counter_mode (count
);
21192 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21193 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21199 top_label
= gen_label_rtx ();
21200 out_label
= gen_label_rtx ();
21201 iter
= gen_reg_rtx (iter_mode
);
21203 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21204 NULL
, 1, OPTAB_DIRECT
);
21205 /* Those two should combine. */
21206 if (piece_size
== const1_rtx
)
21208 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21210 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21212 emit_move_insn (iter
, const0_rtx
);
21214 emit_label (top_label
);
21216 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21217 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21218 destmem
= change_address (destmem
, mode
, x_addr
);
21222 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21223 srcmem
= change_address (srcmem
, mode
, y_addr
);
21225 /* When unrolling for chips that reorder memory reads and writes,
21226 we can save registers by using single temporary.
21227 Also using 4 temporaries is overkill in 32bit mode. */
21228 if (!TARGET_64BIT
&& 0)
21230 for (i
= 0; i
< unroll
; i
++)
21235 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21237 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21239 emit_move_insn (destmem
, srcmem
);
21245 gcc_assert (unroll
<= 4);
21246 for (i
= 0; i
< unroll
; i
++)
21248 tmpreg
[i
] = gen_reg_rtx (mode
);
21252 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21254 emit_move_insn (tmpreg
[i
], srcmem
);
21256 for (i
= 0; i
< unroll
; i
++)
21261 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21263 emit_move_insn (destmem
, tmpreg
[i
]);
21268 for (i
= 0; i
< unroll
; i
++)
21272 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21273 emit_move_insn (destmem
, value
);
21276 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21277 true, OPTAB_LIB_WIDEN
);
21279 emit_move_insn (iter
, tmp
);
21281 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21283 if (expected_size
!= -1)
21285 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21286 if (expected_size
== 0)
21288 else if (expected_size
> REG_BR_PROB_BASE
)
21289 predict_jump (REG_BR_PROB_BASE
- 1);
21291 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21294 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21295 iter
= ix86_zero_extend_to_Pmode (iter
);
21296 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21297 true, OPTAB_LIB_WIDEN
);
21298 if (tmp
!= destptr
)
21299 emit_move_insn (destptr
, tmp
);
21302 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21303 true, OPTAB_LIB_WIDEN
);
21305 emit_move_insn (srcptr
, tmp
);
21307 emit_label (out_label
);
21310 /* Output "rep; mov" instruction.
21311 Arguments have same meaning as for previous function */
21313 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21314 rtx destptr
, rtx srcptr
,
21316 enum machine_mode mode
)
21321 HOST_WIDE_INT rounded_count
;
21323 /* If the size is known, it is shorter to use rep movs. */
21324 if (mode
== QImode
&& CONST_INT_P (count
)
21325 && !(INTVAL (count
) & 3))
21328 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21329 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21330 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21331 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21332 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21333 if (mode
!= QImode
)
21335 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21336 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21337 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21338 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21339 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21340 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21344 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21345 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21347 if (CONST_INT_P (count
))
21349 rounded_count
= (INTVAL (count
)
21350 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21351 destmem
= shallow_copy_rtx (destmem
);
21352 srcmem
= shallow_copy_rtx (srcmem
);
21353 set_mem_size (destmem
, rounded_count
);
21354 set_mem_size (srcmem
, rounded_count
);
21358 if (MEM_SIZE_KNOWN_P (destmem
))
21359 clear_mem_size (destmem
);
21360 if (MEM_SIZE_KNOWN_P (srcmem
))
21361 clear_mem_size (srcmem
);
21363 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21367 /* Output "rep; stos" instruction.
21368 Arguments have same meaning as for previous function */
21370 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21371 rtx count
, enum machine_mode mode
,
21376 HOST_WIDE_INT rounded_count
;
21378 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21379 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21380 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21381 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21382 if (mode
!= QImode
)
21384 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21385 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21386 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21389 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21390 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21392 rounded_count
= (INTVAL (count
)
21393 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21394 destmem
= shallow_copy_rtx (destmem
);
21395 set_mem_size (destmem
, rounded_count
);
21397 else if (MEM_SIZE_KNOWN_P (destmem
))
21398 clear_mem_size (destmem
);
21399 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21403 emit_strmov (rtx destmem
, rtx srcmem
,
21404 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21406 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21407 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21408 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21411 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21413 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21414 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21417 if (CONST_INT_P (count
))
21419 HOST_WIDE_INT countval
= INTVAL (count
);
21422 if ((countval
& 0x10) && max_size
> 16)
21426 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21427 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21430 gcc_unreachable ();
21433 if ((countval
& 0x08) && max_size
> 8)
21436 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21439 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21440 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21444 if ((countval
& 0x04) && max_size
> 4)
21446 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21449 if ((countval
& 0x02) && max_size
> 2)
21451 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21454 if ((countval
& 0x01) && max_size
> 1)
21456 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21463 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21464 count
, 1, OPTAB_DIRECT
);
21465 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21466 count
, QImode
, 1, 4);
21470 /* When there are stringops, we can cheaply increase dest and src pointers.
21471 Otherwise we save code size by maintaining offset (zero is readily
21472 available from preceding rep operation) and using x86 addressing modes.
21474 if (TARGET_SINGLE_STRINGOP
)
21478 rtx label
= ix86_expand_aligntest (count
, 4, true);
21479 src
= change_address (srcmem
, SImode
, srcptr
);
21480 dest
= change_address (destmem
, SImode
, destptr
);
21481 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21482 emit_label (label
);
21483 LABEL_NUSES (label
) = 1;
21487 rtx label
= ix86_expand_aligntest (count
, 2, true);
21488 src
= change_address (srcmem
, HImode
, srcptr
);
21489 dest
= change_address (destmem
, HImode
, destptr
);
21490 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21491 emit_label (label
);
21492 LABEL_NUSES (label
) = 1;
21496 rtx label
= ix86_expand_aligntest (count
, 1, true);
21497 src
= change_address (srcmem
, QImode
, srcptr
);
21498 dest
= change_address (destmem
, QImode
, destptr
);
21499 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21500 emit_label (label
);
21501 LABEL_NUSES (label
) = 1;
21506 rtx offset
= force_reg (Pmode
, const0_rtx
);
21511 rtx label
= ix86_expand_aligntest (count
, 4, true);
21512 src
= change_address (srcmem
, SImode
, srcptr
);
21513 dest
= change_address (destmem
, SImode
, destptr
);
21514 emit_move_insn (dest
, src
);
21515 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21516 true, OPTAB_LIB_WIDEN
);
21518 emit_move_insn (offset
, tmp
);
21519 emit_label (label
);
21520 LABEL_NUSES (label
) = 1;
21524 rtx label
= ix86_expand_aligntest (count
, 2, true);
21525 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21526 src
= change_address (srcmem
, HImode
, tmp
);
21527 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21528 dest
= change_address (destmem
, HImode
, tmp
);
21529 emit_move_insn (dest
, src
);
21530 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21531 true, OPTAB_LIB_WIDEN
);
21533 emit_move_insn (offset
, tmp
);
21534 emit_label (label
);
21535 LABEL_NUSES (label
) = 1;
21539 rtx label
= ix86_expand_aligntest (count
, 1, true);
21540 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21541 src
= change_address (srcmem
, QImode
, tmp
);
21542 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21543 dest
= change_address (destmem
, QImode
, tmp
);
21544 emit_move_insn (dest
, src
);
21545 emit_label (label
);
21546 LABEL_NUSES (label
) = 1;
21551 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21553 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21554 rtx count
, int max_size
)
21557 expand_simple_binop (counter_mode (count
), AND
, count
,
21558 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21559 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21560 gen_lowpart (QImode
, value
), count
, QImode
,
21564 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21566 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21570 if (CONST_INT_P (count
))
21572 HOST_WIDE_INT countval
= INTVAL (count
);
21575 if ((countval
& 0x10) && max_size
> 16)
21579 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21580 emit_insn (gen_strset (destptr
, dest
, value
));
21581 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21582 emit_insn (gen_strset (destptr
, dest
, value
));
21585 gcc_unreachable ();
21588 if ((countval
& 0x08) && max_size
> 8)
21592 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21593 emit_insn (gen_strset (destptr
, dest
, value
));
21597 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21598 emit_insn (gen_strset (destptr
, dest
, value
));
21599 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21600 emit_insn (gen_strset (destptr
, dest
, value
));
21604 if ((countval
& 0x04) && max_size
> 4)
21606 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21607 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21610 if ((countval
& 0x02) && max_size
> 2)
21612 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21613 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21616 if ((countval
& 0x01) && max_size
> 1)
21618 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21619 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21626 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21631 rtx label
= ix86_expand_aligntest (count
, 16, true);
21634 dest
= change_address (destmem
, DImode
, destptr
);
21635 emit_insn (gen_strset (destptr
, dest
, value
));
21636 emit_insn (gen_strset (destptr
, dest
, value
));
21640 dest
= change_address (destmem
, SImode
, destptr
);
21641 emit_insn (gen_strset (destptr
, dest
, value
));
21642 emit_insn (gen_strset (destptr
, dest
, value
));
21643 emit_insn (gen_strset (destptr
, dest
, value
));
21644 emit_insn (gen_strset (destptr
, dest
, value
));
21646 emit_label (label
);
21647 LABEL_NUSES (label
) = 1;
21651 rtx label
= ix86_expand_aligntest (count
, 8, true);
21654 dest
= change_address (destmem
, DImode
, destptr
);
21655 emit_insn (gen_strset (destptr
, dest
, value
));
21659 dest
= change_address (destmem
, SImode
, destptr
);
21660 emit_insn (gen_strset (destptr
, dest
, value
));
21661 emit_insn (gen_strset (destptr
, dest
, value
));
21663 emit_label (label
);
21664 LABEL_NUSES (label
) = 1;
21668 rtx label
= ix86_expand_aligntest (count
, 4, true);
21669 dest
= change_address (destmem
, SImode
, destptr
);
21670 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21671 emit_label (label
);
21672 LABEL_NUSES (label
) = 1;
21676 rtx label
= ix86_expand_aligntest (count
, 2, true);
21677 dest
= change_address (destmem
, HImode
, destptr
);
21678 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21679 emit_label (label
);
21680 LABEL_NUSES (label
) = 1;
21684 rtx label
= ix86_expand_aligntest (count
, 1, true);
21685 dest
= change_address (destmem
, QImode
, destptr
);
21686 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21687 emit_label (label
);
21688 LABEL_NUSES (label
) = 1;
21692 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21693 DESIRED_ALIGNMENT. */
21695 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21696 rtx destptr
, rtx srcptr
, rtx count
,
21697 int align
, int desired_alignment
)
21699 if (align
<= 1 && desired_alignment
> 1)
21701 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21702 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21703 destmem
= change_address (destmem
, QImode
, destptr
);
21704 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21705 ix86_adjust_counter (count
, 1);
21706 emit_label (label
);
21707 LABEL_NUSES (label
) = 1;
21709 if (align
<= 2 && desired_alignment
> 2)
21711 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21712 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21713 destmem
= change_address (destmem
, HImode
, destptr
);
21714 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21715 ix86_adjust_counter (count
, 2);
21716 emit_label (label
);
21717 LABEL_NUSES (label
) = 1;
21719 if (align
<= 4 && desired_alignment
> 4)
21721 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21722 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21723 destmem
= change_address (destmem
, SImode
, destptr
);
21724 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21725 ix86_adjust_counter (count
, 4);
21726 emit_label (label
);
21727 LABEL_NUSES (label
) = 1;
21729 gcc_assert (desired_alignment
<= 8);
21732 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21733 ALIGN_BYTES is how many bytes need to be copied. */
21735 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21736 int desired_align
, int align_bytes
)
21739 rtx orig_dst
= dst
;
21740 rtx orig_src
= src
;
21742 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21743 if (src_align_bytes
>= 0)
21744 src_align_bytes
= desired_align
- src_align_bytes
;
21745 if (align_bytes
& 1)
21747 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21748 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21750 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21752 if (align_bytes
& 2)
21754 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21755 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21756 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21757 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21758 if (src_align_bytes
>= 0
21759 && (src_align_bytes
& 1) == (align_bytes
& 1)
21760 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21761 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21763 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21765 if (align_bytes
& 4)
21767 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21768 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21769 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21770 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21771 if (src_align_bytes
>= 0)
21773 unsigned int src_align
= 0;
21774 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21776 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21778 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21779 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21782 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21784 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21785 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21786 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21787 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21788 if (src_align_bytes
>= 0)
21790 unsigned int src_align
= 0;
21791 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21793 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21795 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21797 if (src_align
> (unsigned int) desired_align
)
21798 src_align
= desired_align
;
21799 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21800 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21802 if (MEM_SIZE_KNOWN_P (orig_dst
))
21803 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21804 if (MEM_SIZE_KNOWN_P (orig_src
))
21805 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
21810 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21811 DESIRED_ALIGNMENT. */
21813 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
21814 int align
, int desired_alignment
)
21816 if (align
<= 1 && desired_alignment
> 1)
21818 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21819 destmem
= change_address (destmem
, QImode
, destptr
);
21820 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
21821 ix86_adjust_counter (count
, 1);
21822 emit_label (label
);
21823 LABEL_NUSES (label
) = 1;
21825 if (align
<= 2 && desired_alignment
> 2)
21827 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21828 destmem
= change_address (destmem
, HImode
, destptr
);
21829 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
21830 ix86_adjust_counter (count
, 2);
21831 emit_label (label
);
21832 LABEL_NUSES (label
) = 1;
21834 if (align
<= 4 && desired_alignment
> 4)
21836 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21837 destmem
= change_address (destmem
, SImode
, destptr
);
21838 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
21839 ix86_adjust_counter (count
, 4);
21840 emit_label (label
);
21841 LABEL_NUSES (label
) = 1;
21843 gcc_assert (desired_alignment
<= 8);
21846 /* Set enough from DST to align DST known to by aligned by ALIGN to
21847 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21849 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
21850 int desired_align
, int align_bytes
)
21853 rtx orig_dst
= dst
;
21854 if (align_bytes
& 1)
21856 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21858 emit_insn (gen_strset (destreg
, dst
,
21859 gen_lowpart (QImode
, value
)));
21861 if (align_bytes
& 2)
21863 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21864 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21865 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21867 emit_insn (gen_strset (destreg
, dst
,
21868 gen_lowpart (HImode
, value
)));
21870 if (align_bytes
& 4)
21872 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21873 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21874 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21876 emit_insn (gen_strset (destreg
, dst
,
21877 gen_lowpart (SImode
, value
)));
21879 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21880 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21881 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21882 if (MEM_SIZE_KNOWN_P (orig_dst
))
21883 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21887 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21888 static enum stringop_alg
21889 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
21890 int *dynamic_check
)
21892 const struct stringop_algs
* algs
;
21893 bool optimize_for_speed
;
21894 /* Algorithms using the rep prefix want at least edi and ecx;
21895 additionally, memset wants eax and memcpy wants esi. Don't
21896 consider such algorithms if the user has appropriated those
21897 registers for their own purposes. */
21898 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
21900 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
21902 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21903 || (alg != rep_prefix_1_byte \
21904 && alg != rep_prefix_4_byte \
21905 && alg != rep_prefix_8_byte))
21906 const struct processor_costs
*cost
;
21908 /* Even if the string operation call is cold, we still might spend a lot
21909 of time processing large blocks. */
21910 if (optimize_function_for_size_p (cfun
)
21911 || (optimize_insn_for_size_p ()
21912 && expected_size
!= -1 && expected_size
< 256))
21913 optimize_for_speed
= false;
21915 optimize_for_speed
= true;
21917 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
21919 *dynamic_check
= -1;
21921 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
21923 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
21924 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
21925 return ix86_stringop_alg
;
21926 /* rep; movq or rep; movl is the smallest variant. */
21927 else if (!optimize_for_speed
)
21929 if (!count
|| (count
& 3))
21930 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
21932 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
21934 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21936 else if (expected_size
!= -1 && expected_size
< 4)
21937 return loop_1_byte
;
21938 else if (expected_size
!= -1)
21941 enum stringop_alg alg
= libcall
;
21942 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21944 /* We get here if the algorithms that were not libcall-based
21945 were rep-prefix based and we are unable to use rep prefixes
21946 based on global register usage. Break out of the loop and
21947 use the heuristic below. */
21948 if (algs
->size
[i
].max
== 0)
21950 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
21952 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21954 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
21956 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21957 last non-libcall inline algorithm. */
21958 if (TARGET_INLINE_ALL_STRINGOPS
)
21960 /* When the current size is best to be copied by a libcall,
21961 but we are still forced to inline, run the heuristic below
21962 that will pick code for medium sized blocks. */
21963 if (alg
!= libcall
)
21967 else if (ALG_USABLE_P (candidate
))
21971 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
21973 /* When asked to inline the call anyway, try to pick meaningful choice.
21974 We look for maximal size of block that is faster to copy by hand and
21975 take blocks of at most of that size guessing that average size will
21976 be roughly half of the block.
21978 If this turns out to be bad, we might simply specify the preferred
21979 choice in ix86_costs. */
21980 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21981 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
21984 enum stringop_alg alg
;
21986 bool any_alg_usable_p
= true;
21988 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21990 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21991 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
21993 if (candidate
!= libcall
&& candidate
21994 && ALG_USABLE_P (candidate
))
21995 max
= algs
->size
[i
].max
;
21997 /* If there aren't any usable algorithms, then recursing on
21998 smaller sizes isn't going to find anything. Just return the
21999 simple byte-at-a-time copy loop. */
22000 if (!any_alg_usable_p
)
22002 /* Pick something reasonable. */
22003 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22004 *dynamic_check
= 128;
22005 return loop_1_byte
;
22009 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
22010 gcc_assert (*dynamic_check
== -1);
22011 gcc_assert (alg
!= libcall
);
22012 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22013 *dynamic_check
= max
;
22016 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22017 #undef ALG_USABLE_P
22020 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22021 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22023 decide_alignment (int align
,
22024 enum stringop_alg alg
,
22027 int desired_align
= 0;
22031 gcc_unreachable ();
22033 case unrolled_loop
:
22034 desired_align
= GET_MODE_SIZE (Pmode
);
22036 case rep_prefix_8_byte
:
22039 case rep_prefix_4_byte
:
22040 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22041 copying whole cacheline at once. */
22042 if (TARGET_PENTIUMPRO
)
22047 case rep_prefix_1_byte
:
22048 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22049 copying whole cacheline at once. */
22050 if (TARGET_PENTIUMPRO
)
22064 if (desired_align
< align
)
22065 desired_align
= align
;
22066 if (expected_size
!= -1 && expected_size
< 4)
22067 desired_align
= align
;
22068 return desired_align
;
22071 /* Return the smallest power of 2 greater than VAL. */
22073 smallest_pow2_greater_than (int val
)
22081 /* Expand string move (memcpy) operation. Use i386 string operations
22082 when profitable. expand_setmem contains similar code. The code
22083 depends upon architecture, block size and alignment, but always has
22084 the same overall structure:
22086 1) Prologue guard: Conditional that jumps up to epilogues for small
22087 blocks that can be handled by epilogue alone. This is faster
22088 but also needed for correctness, since prologue assume the block
22089 is larger than the desired alignment.
22091 Optional dynamic check for size and libcall for large
22092 blocks is emitted here too, with -minline-stringops-dynamically.
22094 2) Prologue: copy first few bytes in order to get destination
22095 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22096 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22097 copied. We emit either a jump tree on power of two sized
22098 blocks, or a byte loop.
22100 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22101 with specified algorithm.
22103 4) Epilogue: code copying tail of the block that is too small to be
22104 handled by main body (or up to size guarded by prologue guard). */
22107 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22108 rtx expected_align_exp
, rtx expected_size_exp
)
22114 rtx jump_around_label
= NULL
;
22115 HOST_WIDE_INT align
= 1;
22116 unsigned HOST_WIDE_INT count
= 0;
22117 HOST_WIDE_INT expected_size
= -1;
22118 int size_needed
= 0, epilogue_size_needed
;
22119 int desired_align
= 0, align_bytes
= 0;
22120 enum stringop_alg alg
;
22122 bool need_zero_guard
= false;
22124 if (CONST_INT_P (align_exp
))
22125 align
= INTVAL (align_exp
);
22126 /* i386 can do misaligned access on reasonably increased cost. */
22127 if (CONST_INT_P (expected_align_exp
)
22128 && INTVAL (expected_align_exp
) > align
)
22129 align
= INTVAL (expected_align_exp
);
22130 /* ALIGN is the minimum of destination and source alignment, but we care here
22131 just about destination alignment. */
22132 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22133 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22135 if (CONST_INT_P (count_exp
))
22136 count
= expected_size
= INTVAL (count_exp
);
22137 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22138 expected_size
= INTVAL (expected_size_exp
);
22140 /* Make sure we don't need to care about overflow later on. */
22141 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22144 /* Step 0: Decide on preferred algorithm, desired alignment and
22145 size of chunks to be copied by main loop. */
22147 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22148 desired_align
= decide_alignment (align
, alg
, expected_size
);
22150 if (!TARGET_ALIGN_STRINGOPS
)
22151 align
= desired_align
;
22153 if (alg
== libcall
)
22155 gcc_assert (alg
!= no_stringop
);
22157 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22158 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22159 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22164 gcc_unreachable ();
22166 need_zero_guard
= true;
22167 size_needed
= GET_MODE_SIZE (word_mode
);
22169 case unrolled_loop
:
22170 need_zero_guard
= true;
22171 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22173 case rep_prefix_8_byte
:
22176 case rep_prefix_4_byte
:
22179 case rep_prefix_1_byte
:
22183 need_zero_guard
= true;
22188 epilogue_size_needed
= size_needed
;
22190 /* Step 1: Prologue guard. */
22192 /* Alignment code needs count to be in register. */
22193 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22195 if (INTVAL (count_exp
) > desired_align
22196 && INTVAL (count_exp
) > size_needed
)
22199 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22200 if (align_bytes
<= 0)
22203 align_bytes
= desired_align
- align_bytes
;
22205 if (align_bytes
== 0)
22206 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22208 gcc_assert (desired_align
>= 1 && align
>= 1);
22210 /* Ensure that alignment prologue won't copy past end of block. */
22211 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22213 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22214 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22215 Make sure it is power of 2. */
22216 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22220 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22222 /* If main algorithm works on QImode, no epilogue is needed.
22223 For small sizes just don't align anything. */
22224 if (size_needed
== 1)
22225 desired_align
= align
;
22232 label
= gen_label_rtx ();
22233 emit_cmp_and_jump_insns (count_exp
,
22234 GEN_INT (epilogue_size_needed
),
22235 LTU
, 0, counter_mode (count_exp
), 1, label
);
22236 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22237 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22239 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22243 /* Emit code to decide on runtime whether library call or inline should be
22245 if (dynamic_check
!= -1)
22247 if (CONST_INT_P (count_exp
))
22249 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22251 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22252 count_exp
= const0_rtx
;
22258 rtx hot_label
= gen_label_rtx ();
22259 jump_around_label
= gen_label_rtx ();
22260 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22261 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22262 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22263 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22264 emit_jump (jump_around_label
);
22265 emit_label (hot_label
);
22269 /* Step 2: Alignment prologue. */
22271 if (desired_align
> align
)
22273 if (align_bytes
== 0)
22275 /* Except for the first move in epilogue, we no longer know
22276 constant offset in aliasing info. It don't seems to worth
22277 the pain to maintain it for the first move, so throw away
22279 src
= change_address (src
, BLKmode
, srcreg
);
22280 dst
= change_address (dst
, BLKmode
, destreg
);
22281 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22286 /* If we know how many bytes need to be stored before dst is
22287 sufficiently aligned, maintain aliasing info accurately. */
22288 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22289 desired_align
, align_bytes
);
22290 count_exp
= plus_constant (counter_mode (count_exp
),
22291 count_exp
, -align_bytes
);
22292 count
-= align_bytes
;
22294 if (need_zero_guard
22295 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22296 || (align_bytes
== 0
22297 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22298 + desired_align
- align
))))
22300 /* It is possible that we copied enough so the main loop will not
22302 gcc_assert (size_needed
> 1);
22303 if (label
== NULL_RTX
)
22304 label
= gen_label_rtx ();
22305 emit_cmp_and_jump_insns (count_exp
,
22306 GEN_INT (size_needed
),
22307 LTU
, 0, counter_mode (count_exp
), 1, label
);
22308 if (expected_size
== -1
22309 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22310 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22312 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22315 if (label
&& size_needed
== 1)
22317 emit_label (label
);
22318 LABEL_NUSES (label
) = 1;
22320 epilogue_size_needed
= 1;
22322 else if (label
== NULL_RTX
)
22323 epilogue_size_needed
= size_needed
;
22325 /* Step 3: Main loop. */
22331 gcc_unreachable ();
22333 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22334 count_exp
, QImode
, 1, expected_size
);
22337 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22338 count_exp
, word_mode
, 1, expected_size
);
22340 case unrolled_loop
:
22341 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22342 registers for 4 temporaries anyway. */
22343 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22344 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22347 case rep_prefix_8_byte
:
22348 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22351 case rep_prefix_4_byte
:
22352 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22355 case rep_prefix_1_byte
:
22356 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22360 /* Adjust properly the offset of src and dest memory for aliasing. */
22361 if (CONST_INT_P (count_exp
))
22363 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22364 (count
/ size_needed
) * size_needed
);
22365 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22366 (count
/ size_needed
) * size_needed
);
22370 src
= change_address (src
, BLKmode
, srcreg
);
22371 dst
= change_address (dst
, BLKmode
, destreg
);
22374 /* Step 4: Epilogue to copy the remaining bytes. */
22378 /* When the main loop is done, COUNT_EXP might hold original count,
22379 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22380 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22381 bytes. Compensate if needed. */
22383 if (size_needed
< epilogue_size_needed
)
22386 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22387 GEN_INT (size_needed
- 1), count_exp
, 1,
22389 if (tmp
!= count_exp
)
22390 emit_move_insn (count_exp
, tmp
);
22392 emit_label (label
);
22393 LABEL_NUSES (label
) = 1;
22396 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22397 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22398 epilogue_size_needed
);
22399 if (jump_around_label
)
22400 emit_label (jump_around_label
);
22404 /* Helper function for memcpy. For QImode value 0xXY produce
22405 0xXYXYXYXY of wide specified by MODE. This is essentially
22406 a * 0x10101010, but we can do slightly better than
22407 synth_mult by unwinding the sequence by hand on CPUs with
22410 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22412 enum machine_mode valmode
= GET_MODE (val
);
22414 int nops
= mode
== DImode
? 3 : 2;
22416 gcc_assert (mode
== SImode
|| mode
== DImode
);
22417 if (val
== const0_rtx
)
22418 return copy_to_mode_reg (mode
, const0_rtx
);
22419 if (CONST_INT_P (val
))
22421 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22425 if (mode
== DImode
)
22426 v
|= (v
<< 16) << 16;
22427 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22430 if (valmode
== VOIDmode
)
22432 if (valmode
!= QImode
)
22433 val
= gen_lowpart (QImode
, val
);
22434 if (mode
== QImode
)
22436 if (!TARGET_PARTIAL_REG_STALL
)
22438 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22439 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22440 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22441 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22443 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22444 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22445 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22450 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22452 if (!TARGET_PARTIAL_REG_STALL
)
22453 if (mode
== SImode
)
22454 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22456 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22459 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22460 NULL
, 1, OPTAB_DIRECT
);
22462 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22464 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22465 NULL
, 1, OPTAB_DIRECT
);
22466 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22467 if (mode
== SImode
)
22469 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22470 NULL
, 1, OPTAB_DIRECT
);
22471 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22476 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22477 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22478 alignment from ALIGN to DESIRED_ALIGN. */
22480 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22485 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22486 promoted_val
= promote_duplicated_reg (DImode
, val
);
22487 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22488 promoted_val
= promote_duplicated_reg (SImode
, val
);
22489 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22490 promoted_val
= promote_duplicated_reg (HImode
, val
);
22492 promoted_val
= val
;
22494 return promoted_val
;
22497 /* Expand string clear operation (bzero). Use i386 string operations when
22498 profitable. See expand_movmem comment for explanation of individual
22499 steps performed. */
22501 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22502 rtx expected_align_exp
, rtx expected_size_exp
)
22507 rtx jump_around_label
= NULL
;
22508 HOST_WIDE_INT align
= 1;
22509 unsigned HOST_WIDE_INT count
= 0;
22510 HOST_WIDE_INT expected_size
= -1;
22511 int size_needed
= 0, epilogue_size_needed
;
22512 int desired_align
= 0, align_bytes
= 0;
22513 enum stringop_alg alg
;
22514 rtx promoted_val
= NULL
;
22515 bool force_loopy_epilogue
= false;
22517 bool need_zero_guard
= false;
22519 if (CONST_INT_P (align_exp
))
22520 align
= INTVAL (align_exp
);
22521 /* i386 can do misaligned access on reasonably increased cost. */
22522 if (CONST_INT_P (expected_align_exp
)
22523 && INTVAL (expected_align_exp
) > align
)
22524 align
= INTVAL (expected_align_exp
);
22525 if (CONST_INT_P (count_exp
))
22526 count
= expected_size
= INTVAL (count_exp
);
22527 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22528 expected_size
= INTVAL (expected_size_exp
);
22530 /* Make sure we don't need to care about overflow later on. */
22531 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22534 /* Step 0: Decide on preferred algorithm, desired alignment and
22535 size of chunks to be copied by main loop. */
22537 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22538 desired_align
= decide_alignment (align
, alg
, expected_size
);
22540 if (!TARGET_ALIGN_STRINGOPS
)
22541 align
= desired_align
;
22543 if (alg
== libcall
)
22545 gcc_assert (alg
!= no_stringop
);
22547 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22548 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22553 gcc_unreachable ();
22555 need_zero_guard
= true;
22556 size_needed
= GET_MODE_SIZE (word_mode
);
22558 case unrolled_loop
:
22559 need_zero_guard
= true;
22560 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
22562 case rep_prefix_8_byte
:
22565 case rep_prefix_4_byte
:
22568 case rep_prefix_1_byte
:
22572 need_zero_guard
= true;
22576 epilogue_size_needed
= size_needed
;
22578 /* Step 1: Prologue guard. */
22580 /* Alignment code needs count to be in register. */
22581 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22583 if (INTVAL (count_exp
) > desired_align
22584 && INTVAL (count_exp
) > size_needed
)
22587 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22588 if (align_bytes
<= 0)
22591 align_bytes
= desired_align
- align_bytes
;
22593 if (align_bytes
== 0)
22595 enum machine_mode mode
= SImode
;
22596 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22598 count_exp
= force_reg (mode
, count_exp
);
22601 /* Do the cheap promotion to allow better CSE across the
22602 main loop and epilogue (ie one load of the big constant in the
22603 front of all code. */
22604 if (CONST_INT_P (val_exp
))
22605 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22606 desired_align
, align
);
22607 /* Ensure that alignment prologue won't copy past end of block. */
22608 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22610 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22611 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22612 Make sure it is power of 2. */
22613 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22615 /* To improve performance of small blocks, we jump around the VAL
22616 promoting mode. This mean that if the promoted VAL is not constant,
22617 we might not use it in the epilogue and have to use byte
22619 if (epilogue_size_needed
> 2 && !promoted_val
)
22620 force_loopy_epilogue
= true;
22623 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22625 /* If main algorithm works on QImode, no epilogue is needed.
22626 For small sizes just don't align anything. */
22627 if (size_needed
== 1)
22628 desired_align
= align
;
22635 label
= gen_label_rtx ();
22636 emit_cmp_and_jump_insns (count_exp
,
22637 GEN_INT (epilogue_size_needed
),
22638 LTU
, 0, counter_mode (count_exp
), 1, label
);
22639 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22640 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22642 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22645 if (dynamic_check
!= -1)
22647 rtx hot_label
= gen_label_rtx ();
22648 jump_around_label
= gen_label_rtx ();
22649 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22650 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22651 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22652 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22653 emit_jump (jump_around_label
);
22654 emit_label (hot_label
);
22657 /* Step 2: Alignment prologue. */
22659 /* Do the expensive promotion once we branched off the small blocks. */
22661 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22662 desired_align
, align
);
22663 gcc_assert (desired_align
>= 1 && align
>= 1);
22665 if (desired_align
> align
)
22667 if (align_bytes
== 0)
22669 /* Except for the first move in epilogue, we no longer know
22670 constant offset in aliasing info. It don't seems to worth
22671 the pain to maintain it for the first move, so throw away
22673 dst
= change_address (dst
, BLKmode
, destreg
);
22674 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22679 /* If we know how many bytes need to be stored before dst is
22680 sufficiently aligned, maintain aliasing info accurately. */
22681 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22682 desired_align
, align_bytes
);
22683 count_exp
= plus_constant (counter_mode (count_exp
),
22684 count_exp
, -align_bytes
);
22685 count
-= align_bytes
;
22687 if (need_zero_guard
22688 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22689 || (align_bytes
== 0
22690 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22691 + desired_align
- align
))))
22693 /* It is possible that we copied enough so the main loop will not
22695 gcc_assert (size_needed
> 1);
22696 if (label
== NULL_RTX
)
22697 label
= gen_label_rtx ();
22698 emit_cmp_and_jump_insns (count_exp
,
22699 GEN_INT (size_needed
),
22700 LTU
, 0, counter_mode (count_exp
), 1, label
);
22701 if (expected_size
== -1
22702 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22703 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22705 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22708 if (label
&& size_needed
== 1)
22710 emit_label (label
);
22711 LABEL_NUSES (label
) = 1;
22713 promoted_val
= val_exp
;
22714 epilogue_size_needed
= 1;
22716 else if (label
== NULL_RTX
)
22717 epilogue_size_needed
= size_needed
;
22719 /* Step 3: Main loop. */
22725 gcc_unreachable ();
22727 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22728 count_exp
, QImode
, 1, expected_size
);
22731 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22732 count_exp
, word_mode
, 1, expected_size
);
22734 case unrolled_loop
:
22735 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22736 count_exp
, word_mode
, 4, expected_size
);
22738 case rep_prefix_8_byte
:
22739 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22742 case rep_prefix_4_byte
:
22743 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22746 case rep_prefix_1_byte
:
22747 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22751 /* Adjust properly the offset of src and dest memory for aliasing. */
22752 if (CONST_INT_P (count_exp
))
22753 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22754 (count
/ size_needed
) * size_needed
);
22756 dst
= change_address (dst
, BLKmode
, destreg
);
22758 /* Step 4: Epilogue to copy the remaining bytes. */
22762 /* When the main loop is done, COUNT_EXP might hold original count,
22763 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22764 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22765 bytes. Compensate if needed. */
22767 if (size_needed
< epilogue_size_needed
)
22770 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22771 GEN_INT (size_needed
- 1), count_exp
, 1,
22773 if (tmp
!= count_exp
)
22774 emit_move_insn (count_exp
, tmp
);
22776 emit_label (label
);
22777 LABEL_NUSES (label
) = 1;
22780 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22782 if (force_loopy_epilogue
)
22783 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22784 epilogue_size_needed
);
22786 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22787 epilogue_size_needed
);
22789 if (jump_around_label
)
22790 emit_label (jump_around_label
);
22794 /* Expand the appropriate insns for doing strlen if not just doing
22797 out = result, initialized with the start address
22798 align_rtx = alignment of the address.
22799 scratch = scratch register, initialized with the startaddress when
22800 not aligned, otherwise undefined
22802 This is just the body. It needs the initializations mentioned above and
22803 some address computing at the end. These things are done in i386.md. */
22806 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
22810 rtx align_2_label
= NULL_RTX
;
22811 rtx align_3_label
= NULL_RTX
;
22812 rtx align_4_label
= gen_label_rtx ();
22813 rtx end_0_label
= gen_label_rtx ();
22815 rtx tmpreg
= gen_reg_rtx (SImode
);
22816 rtx scratch
= gen_reg_rtx (SImode
);
22820 if (CONST_INT_P (align_rtx
))
22821 align
= INTVAL (align_rtx
);
22823 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22825 /* Is there a known alignment and is it less than 4? */
22828 rtx scratch1
= gen_reg_rtx (Pmode
);
22829 emit_move_insn (scratch1
, out
);
22830 /* Is there a known alignment and is it not 2? */
22833 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
22834 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
22836 /* Leave just the 3 lower bits. */
22837 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
22838 NULL_RTX
, 0, OPTAB_WIDEN
);
22840 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22841 Pmode
, 1, align_4_label
);
22842 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
22843 Pmode
, 1, align_2_label
);
22844 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
22845 Pmode
, 1, align_3_label
);
22849 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22850 check if is aligned to 4 - byte. */
22852 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
22853 NULL_RTX
, 0, OPTAB_WIDEN
);
22855 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22856 Pmode
, 1, align_4_label
);
22859 mem
= change_address (src
, QImode
, out
);
22861 /* Now compare the bytes. */
22863 /* Compare the first n unaligned byte on a byte per byte basis. */
22864 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
22865 QImode
, 1, end_0_label
);
22867 /* Increment the address. */
22868 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22870 /* Not needed with an alignment of 2 */
22873 emit_label (align_2_label
);
22875 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22878 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22880 emit_label (align_3_label
);
22883 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22886 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22889 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22890 align this loop. It gives only huge programs, but does not help to
22892 emit_label (align_4_label
);
22894 mem
= change_address (src
, SImode
, out
);
22895 emit_move_insn (scratch
, mem
);
22896 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
22898 /* This formula yields a nonzero result iff one of the bytes is zero.
22899 This saves three branches inside loop and many cycles. */
22901 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
22902 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
22903 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
22904 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
22905 gen_int_mode (0x80808080, SImode
)));
22906 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
22911 rtx reg
= gen_reg_rtx (SImode
);
22912 rtx reg2
= gen_reg_rtx (Pmode
);
22913 emit_move_insn (reg
, tmpreg
);
22914 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
22916 /* If zero is not in the first two bytes, move two bytes forward. */
22917 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22918 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22919 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22920 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
22921 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
22924 /* Emit lea manually to avoid clobbering of flags. */
22925 emit_insn (gen_rtx_SET (SImode
, reg2
,
22926 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
22928 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22929 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22930 emit_insn (gen_rtx_SET (VOIDmode
, out
,
22931 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
22937 rtx end_2_label
= gen_label_rtx ();
22938 /* Is zero in the first two bytes? */
22940 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22941 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22942 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
22943 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
22944 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
22946 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
22947 JUMP_LABEL (tmp
) = end_2_label
;
22949 /* Not in the first two. Move two bytes forward. */
22950 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
22951 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
22953 emit_label (end_2_label
);
22957 /* Avoid branch in fixing the byte. */
22958 tmpreg
= gen_lowpart (QImode
, tmpreg
);
22959 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
22960 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22961 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
22962 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
22964 emit_label (end_0_label
);
22967 /* Expand strlen. */
22970 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
22972 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
22974 /* The generic case of strlen expander is long. Avoid it's
22975 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22977 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22978 && !TARGET_INLINE_ALL_STRINGOPS
22979 && !optimize_insn_for_size_p ()
22980 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
22983 addr
= force_reg (Pmode
, XEXP (src
, 0));
22984 scratch1
= gen_reg_rtx (Pmode
);
22986 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22987 && !optimize_insn_for_size_p ())
22989 /* Well it seems that some optimizer does not combine a call like
22990 foo(strlen(bar), strlen(bar));
22991 when the move and the subtraction is done here. It does calculate
22992 the length just once when these instructions are done inside of
22993 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22994 often used and I use one fewer register for the lifetime of
22995 output_strlen_unroll() this is better. */
22997 emit_move_insn (out
, addr
);
22999 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23001 /* strlensi_unroll_1 returns the address of the zero at the end of
23002 the string, like memchr(), so compute the length by subtracting
23003 the start address. */
23004 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23010 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23011 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23014 scratch2
= gen_reg_rtx (Pmode
);
23015 scratch3
= gen_reg_rtx (Pmode
);
23016 scratch4
= force_reg (Pmode
, constm1_rtx
);
23018 emit_move_insn (scratch3
, addr
);
23019 eoschar
= force_reg (QImode
, eoschar
);
23021 src
= replace_equiv_address_nv (src
, scratch3
);
23023 /* If .md starts supporting :P, this can be done in .md. */
23024 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23025 scratch4
), UNSPEC_SCAS
);
23026 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23027 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23028 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23033 /* For given symbol (function) construct code to compute address of it's PLT
23034 entry in large x86-64 PIC model. */
23036 construct_plt_address (rtx symbol
)
23040 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23041 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23042 gcc_assert (Pmode
== DImode
);
23044 tmp
= gen_reg_rtx (Pmode
);
23045 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23047 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23048 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23053 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23055 rtx pop
, bool sibcall
)
23057 /* We need to represent that SI and DI registers are clobbered
23059 static int clobbered_registers
[] = {
23060 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23061 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23062 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23063 XMM15_REG
, SI_REG
, DI_REG
23065 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23066 rtx use
= NULL
, call
;
23067 unsigned int vec_len
;
23069 if (pop
== const0_rtx
)
23071 gcc_assert (!TARGET_64BIT
|| !pop
);
23073 if (TARGET_MACHO
&& !TARGET_64BIT
)
23076 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23077 fnaddr
= machopic_indirect_call_target (fnaddr
);
23082 /* Static functions and indirect calls don't need the pic register. */
23083 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23084 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23085 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23086 use_reg (&use
, pic_offset_table_rtx
);
23089 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23091 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23092 emit_move_insn (al
, callarg2
);
23093 use_reg (&use
, al
);
23096 if (ix86_cmodel
== CM_LARGE_PIC
23098 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23099 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23100 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23102 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23103 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23105 fnaddr
= XEXP (fnaddr
, 0);
23106 if (GET_MODE (fnaddr
) != word_mode
)
23107 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23108 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23112 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23114 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23115 vec
[vec_len
++] = call
;
23119 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23120 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23121 vec
[vec_len
++] = pop
;
23124 if (TARGET_64BIT_MS_ABI
23125 && (!callarg2
|| INTVAL (callarg2
) != -2))
23129 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23130 UNSPEC_MS_TO_SYSV_CALL
);
23132 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23134 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23136 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23138 clobbered_registers
[i
]));
23141 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23142 if (TARGET_VZEROUPPER
)
23145 if (cfun
->machine
->callee_pass_avx256_p
)
23147 if (cfun
->machine
->callee_return_avx256_p
)
23148 avx256
= callee_return_pass_avx256
;
23150 avx256
= callee_pass_avx256
;
23152 else if (cfun
->machine
->callee_return_avx256_p
)
23153 avx256
= callee_return_avx256
;
23155 avx256
= call_no_avx256
;
23157 if (reload_completed
)
23158 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23160 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23161 gen_rtvec (1, GEN_INT (avx256
)),
23162 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23166 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23167 call
= emit_call_insn (call
);
23169 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23175 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23177 rtx pat
= PATTERN (insn
);
23178 rtvec vec
= XVEC (pat
, 0);
23179 int len
= GET_NUM_ELEM (vec
) - 1;
23181 /* Strip off the last entry of the parallel. */
23182 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23183 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23185 pat
= RTVEC_ELT (vec
, 0);
23187 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23189 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23190 emit_call_insn (pat
);
23193 /* Output the assembly for a call instruction. */
23196 ix86_output_call_insn (rtx insn
, rtx call_op
)
23198 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23199 bool seh_nop_p
= false;
23202 if (SIBLING_CALL_P (insn
))
23206 /* SEH epilogue detection requires the indirect branch case
23207 to include REX.W. */
23208 else if (TARGET_SEH
)
23209 xasm
= "rex.W jmp %A0";
23213 output_asm_insn (xasm
, &call_op
);
23217 /* SEH unwinding can require an extra nop to be emitted in several
23218 circumstances. Determine if we have one of those. */
23223 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23225 /* If we get to another real insn, we don't need the nop. */
23229 /* If we get to the epilogue note, prevent a catch region from
23230 being adjacent to the standard epilogue sequence. If non-
23231 call-exceptions, we'll have done this during epilogue emission. */
23232 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23233 && !flag_non_call_exceptions
23234 && !can_throw_internal (insn
))
23241 /* If we didn't find a real insn following the call, prevent the
23242 unwinder from looking into the next function. */
23248 xasm
= "call\t%P0";
23250 xasm
= "call\t%A0";
23252 output_asm_insn (xasm
, &call_op
);
23260 /* Clear stack slot assignments remembered from previous functions.
23261 This is called from INIT_EXPANDERS once before RTL is emitted for each
23264 static struct machine_function
*
23265 ix86_init_machine_status (void)
23267 struct machine_function
*f
;
23269 f
= ggc_alloc_cleared_machine_function ();
23270 f
->use_fast_prologue_epilogue_nregs
= -1;
23271 f
->tls_descriptor_call_expanded_p
= 0;
23272 f
->call_abi
= ix86_abi
;
23277 /* Return a MEM corresponding to a stack slot with mode MODE.
23278 Allocate a new slot if necessary.
23280 The RTL for a function can have several slots available: N is
23281 which slot to use. */
23284 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23286 struct stack_local_entry
*s
;
23288 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23290 /* Virtual slot is valid only before vregs are instantiated. */
23291 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23293 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23294 if (s
->mode
== mode
&& s
->n
== n
)
23295 return validize_mem (copy_rtx (s
->rtl
));
23297 s
= ggc_alloc_stack_local_entry ();
23300 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23302 s
->next
= ix86_stack_locals
;
23303 ix86_stack_locals
= s
;
23304 return validize_mem (s
->rtl
);
23307 /* Calculate the length of the memory address in the instruction encoding.
23308 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23309 or other prefixes. */
23312 memory_address_length (rtx addr
)
23314 struct ix86_address parts
;
23315 rtx base
, index
, disp
;
23319 if (GET_CODE (addr
) == PRE_DEC
23320 || GET_CODE (addr
) == POST_INC
23321 || GET_CODE (addr
) == PRE_MODIFY
23322 || GET_CODE (addr
) == POST_MODIFY
)
23325 ok
= ix86_decompose_address (addr
, &parts
);
23328 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23329 parts
.base
= SUBREG_REG (parts
.base
);
23330 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23331 parts
.index
= SUBREG_REG (parts
.index
);
23334 index
= parts
.index
;
23337 /* Add length of addr32 prefix. */
23338 len
= (GET_CODE (addr
) == ZERO_EXTEND
23339 || GET_CODE (addr
) == AND
);
23342 - esp as the base always wants an index,
23343 - ebp as the base always wants a displacement,
23344 - r12 as the base always wants an index,
23345 - r13 as the base always wants a displacement. */
23347 /* Register Indirect. */
23348 if (base
&& !index
&& !disp
)
23350 /* esp (for its index) and ebp (for its displacement) need
23351 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23354 && (addr
== arg_pointer_rtx
23355 || addr
== frame_pointer_rtx
23356 || REGNO (addr
) == SP_REG
23357 || REGNO (addr
) == BP_REG
23358 || REGNO (addr
) == R12_REG
23359 || REGNO (addr
) == R13_REG
))
23363 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23364 is not disp32, but disp32(%rip), so for disp32
23365 SIB byte is needed, unless print_operand_address
23366 optimizes it into disp32(%rip) or (%rip) is implied
23368 else if (disp
&& !base
&& !index
)
23375 if (GET_CODE (disp
) == CONST
)
23376 symbol
= XEXP (disp
, 0);
23377 if (GET_CODE (symbol
) == PLUS
23378 && CONST_INT_P (XEXP (symbol
, 1)))
23379 symbol
= XEXP (symbol
, 0);
23381 if (GET_CODE (symbol
) != LABEL_REF
23382 && (GET_CODE (symbol
) != SYMBOL_REF
23383 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23384 && (GET_CODE (symbol
) != UNSPEC
23385 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23386 && XINT (symbol
, 1) != UNSPEC_PCREL
23387 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23394 /* Find the length of the displacement constant. */
23397 if (base
&& satisfies_constraint_K (disp
))
23402 /* ebp always wants a displacement. Similarly r13. */
23403 else if (base
&& REG_P (base
)
23404 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23407 /* An index requires the two-byte modrm form.... */
23409 /* ...like esp (or r12), which always wants an index. */
23410 || base
== arg_pointer_rtx
23411 || base
== frame_pointer_rtx
23412 || (base
&& REG_P (base
)
23413 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23430 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23431 is set, expect that insn have 8bit immediate alternative. */
23433 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23437 extract_insn_cached (insn
);
23438 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23439 if (CONSTANT_P (recog_data
.operand
[i
]))
23441 enum attr_mode mode
= get_attr_mode (insn
);
23444 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23446 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23453 ival
= trunc_int_for_mode (ival
, HImode
);
23456 ival
= trunc_int_for_mode (ival
, SImode
);
23461 if (IN_RANGE (ival
, -128, 127))
23478 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23483 fatal_insn ("unknown insn mode", insn
);
23488 /* Compute default value for "length_address" attribute. */
23490 ix86_attr_length_address_default (rtx insn
)
23494 if (get_attr_type (insn
) == TYPE_LEA
)
23496 rtx set
= PATTERN (insn
), addr
;
23498 if (GET_CODE (set
) == PARALLEL
)
23499 set
= XVECEXP (set
, 0, 0);
23501 gcc_assert (GET_CODE (set
) == SET
);
23503 addr
= SET_SRC (set
);
23504 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23506 if (GET_CODE (addr
) == ZERO_EXTEND
)
23507 addr
= XEXP (addr
, 0);
23508 if (GET_CODE (addr
) == SUBREG
)
23509 addr
= SUBREG_REG (addr
);
23512 return memory_address_length (addr
);
23515 extract_insn_cached (insn
);
23516 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23517 if (MEM_P (recog_data
.operand
[i
]))
23519 constrain_operands_cached (reload_completed
);
23520 if (which_alternative
!= -1)
23522 const char *constraints
= recog_data
.constraints
[i
];
23523 int alt
= which_alternative
;
23525 while (*constraints
== '=' || *constraints
== '+')
23528 while (*constraints
++ != ',')
23530 /* Skip ignored operands. */
23531 if (*constraints
== 'X')
23534 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23539 /* Compute default value for "length_vex" attribute. It includes
23540 2 or 3 byte VEX prefix and 1 opcode byte. */
23543 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23547 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23548 byte VEX prefix. */
23549 if (!has_0f_opcode
|| has_vex_w
)
23552 /* We can always use 2 byte VEX prefix in 32bit. */
23556 extract_insn_cached (insn
);
23558 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23559 if (REG_P (recog_data
.operand
[i
]))
23561 /* REX.W bit uses 3 byte VEX prefix. */
23562 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23563 && GENERAL_REG_P (recog_data
.operand
[i
]))
23568 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23569 if (MEM_P (recog_data
.operand
[i
])
23570 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23577 /* Return the maximum number of instructions a cpu can issue. */
23580 ix86_issue_rate (void)
23584 case PROCESSOR_PENTIUM
:
23585 case PROCESSOR_ATOM
:
23589 case PROCESSOR_PENTIUMPRO
:
23590 case PROCESSOR_PENTIUM4
:
23591 case PROCESSOR_CORE2_32
:
23592 case PROCESSOR_CORE2_64
:
23593 case PROCESSOR_COREI7_32
:
23594 case PROCESSOR_COREI7_64
:
23595 case PROCESSOR_ATHLON
:
23597 case PROCESSOR_AMDFAM10
:
23598 case PROCESSOR_NOCONA
:
23599 case PROCESSOR_GENERIC32
:
23600 case PROCESSOR_GENERIC64
:
23601 case PROCESSOR_BDVER1
:
23602 case PROCESSOR_BDVER2
:
23603 case PROCESSOR_BTVER1
:
23611 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23612 by DEP_INSN and nothing set by DEP_INSN. */
23615 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23619 /* Simplify the test for uninteresting insns. */
23620 if (insn_type
!= TYPE_SETCC
23621 && insn_type
!= TYPE_ICMOV
23622 && insn_type
!= TYPE_FCMOV
23623 && insn_type
!= TYPE_IBR
)
23626 if ((set
= single_set (dep_insn
)) != 0)
23628 set
= SET_DEST (set
);
23631 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23632 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23633 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23634 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23636 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23637 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23642 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23645 /* This test is true if the dependent insn reads the flags but
23646 not any other potentially set register. */
23647 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23650 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23656 /* Return true iff USE_INSN has a memory address with operands set by
23660 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23663 extract_insn_cached (use_insn
);
23664 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23665 if (MEM_P (recog_data
.operand
[i
]))
23667 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23668 return modified_in_p (addr
, set_insn
) != 0;
23674 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23676 enum attr_type insn_type
, dep_insn_type
;
23677 enum attr_memory memory
;
23679 int dep_insn_code_number
;
23681 /* Anti and output dependencies have zero cost on all CPUs. */
23682 if (REG_NOTE_KIND (link
) != 0)
23685 dep_insn_code_number
= recog_memoized (dep_insn
);
23687 /* If we can't recognize the insns, we can't really do anything. */
23688 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23691 insn_type
= get_attr_type (insn
);
23692 dep_insn_type
= get_attr_type (dep_insn
);
23696 case PROCESSOR_PENTIUM
:
23697 /* Address Generation Interlock adds a cycle of latency. */
23698 if (insn_type
== TYPE_LEA
)
23700 rtx addr
= PATTERN (insn
);
23702 if (GET_CODE (addr
) == PARALLEL
)
23703 addr
= XVECEXP (addr
, 0, 0);
23705 gcc_assert (GET_CODE (addr
) == SET
);
23707 addr
= SET_SRC (addr
);
23708 if (modified_in_p (addr
, dep_insn
))
23711 else if (ix86_agi_dependent (dep_insn
, insn
))
23714 /* ??? Compares pair with jump/setcc. */
23715 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23718 /* Floating point stores require value to be ready one cycle earlier. */
23719 if (insn_type
== TYPE_FMOV
23720 && get_attr_memory (insn
) == MEMORY_STORE
23721 && !ix86_agi_dependent (dep_insn
, insn
))
23725 case PROCESSOR_PENTIUMPRO
:
23726 memory
= get_attr_memory (insn
);
23728 /* INT->FP conversion is expensive. */
23729 if (get_attr_fp_int_src (dep_insn
))
23732 /* There is one cycle extra latency between an FP op and a store. */
23733 if (insn_type
== TYPE_FMOV
23734 && (set
= single_set (dep_insn
)) != NULL_RTX
23735 && (set2
= single_set (insn
)) != NULL_RTX
23736 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23737 && MEM_P (SET_DEST (set2
)))
23740 /* Show ability of reorder buffer to hide latency of load by executing
23741 in parallel with previous instruction in case
23742 previous instruction is not needed to compute the address. */
23743 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23744 && !ix86_agi_dependent (dep_insn
, insn
))
23746 /* Claim moves to take one cycle, as core can issue one load
23747 at time and the next load can start cycle later. */
23748 if (dep_insn_type
== TYPE_IMOV
23749 || dep_insn_type
== TYPE_FMOV
)
23757 memory
= get_attr_memory (insn
);
23759 /* The esp dependency is resolved before the instruction is really
23761 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23762 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23765 /* INT->FP conversion is expensive. */
23766 if (get_attr_fp_int_src (dep_insn
))
23769 /* Show ability of reorder buffer to hide latency of load by executing
23770 in parallel with previous instruction in case
23771 previous instruction is not needed to compute the address. */
23772 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23773 && !ix86_agi_dependent (dep_insn
, insn
))
23775 /* Claim moves to take one cycle, as core can issue one load
23776 at time and the next load can start cycle later. */
23777 if (dep_insn_type
== TYPE_IMOV
23778 || dep_insn_type
== TYPE_FMOV
)
23787 case PROCESSOR_ATHLON
:
23789 case PROCESSOR_AMDFAM10
:
23790 case PROCESSOR_BDVER1
:
23791 case PROCESSOR_BDVER2
:
23792 case PROCESSOR_BTVER1
:
23793 case PROCESSOR_ATOM
:
23794 case PROCESSOR_GENERIC32
:
23795 case PROCESSOR_GENERIC64
:
23796 memory
= get_attr_memory (insn
);
23798 /* Show ability of reorder buffer to hide latency of load by executing
23799 in parallel with previous instruction in case
23800 previous instruction is not needed to compute the address. */
23801 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23802 && !ix86_agi_dependent (dep_insn
, insn
))
23804 enum attr_unit unit
= get_attr_unit (insn
);
23807 /* Because of the difference between the length of integer and
23808 floating unit pipeline preparation stages, the memory operands
23809 for floating point are cheaper.
23811 ??? For Athlon it the difference is most probably 2. */
23812 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
23815 loadcost
= TARGET_ATHLON
? 2 : 0;
23817 if (cost
>= loadcost
)
23830 /* How many alternative schedules to try. This should be as wide as the
23831 scheduling freedom in the DFA, but no wider. Making this value too
23832 large results extra work for the scheduler. */
23835 ia32_multipass_dfa_lookahead (void)
23839 case PROCESSOR_PENTIUM
:
23842 case PROCESSOR_PENTIUMPRO
:
23846 case PROCESSOR_CORE2_32
:
23847 case PROCESSOR_CORE2_64
:
23848 case PROCESSOR_COREI7_32
:
23849 case PROCESSOR_COREI7_64
:
23850 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23851 as many instructions can be executed on a cycle, i.e.,
23852 issue_rate. I wonder why tuning for many CPUs does not do this. */
23853 return ix86_issue_rate ();
23860 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
23861 execution. It is applied if
23862 (1) IMUL instruction is on the top of list;
23863 (2) There exists the only producer of independent IMUL instruction in
23865 (3) Put found producer on the top of ready list.
23866 Returns issue rate. */
23869 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
23870 int clock_var ATTRIBUTE_UNUSED
)
23872 static int issue_rate
= -1;
23873 int n_ready
= *pn_ready
;
23874 rtx insn
, insn1
, insn2
;
23876 sd_iterator_def sd_it
;
23880 /* Set up issue rate. */
23881 issue_rate
= ix86_issue_rate();
23883 /* Do reodering for Atom only. */
23884 if (ix86_tune
!= PROCESSOR_ATOM
)
23886 /* Nothing to do if ready list contains only 1 instruction. */
23890 /* Check that IMUL instruction is on the top of ready list. */
23891 insn
= ready
[n_ready
- 1];
23892 if (!NONDEBUG_INSN_P (insn
))
23894 insn
= PATTERN (insn
);
23895 if (GET_CODE (insn
) == PARALLEL
)
23896 insn
= XVECEXP (insn
, 0, 0);
23897 if (GET_CODE (insn
) != SET
)
23899 if (!(GET_CODE (SET_SRC (insn
)) == MULT
23900 && GET_MODE (SET_SRC (insn
)) == SImode
))
23903 /* Search for producer of independent IMUL instruction. */
23904 for (i
= n_ready
- 2; i
>= 0; i
--)
23907 if (!NONDEBUG_INSN_P (insn
))
23909 /* Skip IMUL instruction. */
23910 insn2
= PATTERN (insn
);
23911 if (GET_CODE (insn2
) == PARALLEL
)
23912 insn2
= XVECEXP (insn2
, 0, 0);
23913 if (GET_CODE (insn2
) == SET
23914 && GET_CODE (SET_SRC (insn2
)) == MULT
23915 && GET_MODE (SET_SRC (insn2
)) == SImode
)
23918 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
23921 con
= DEP_CON (dep
);
23922 if (!NONDEBUG_INSN_P (con
))
23924 insn1
= PATTERN (con
);
23925 if (GET_CODE (insn1
) == PARALLEL
)
23926 insn1
= XVECEXP (insn1
, 0, 0);
23928 if (GET_CODE (insn1
) == SET
23929 && GET_CODE (SET_SRC (insn1
)) == MULT
23930 && GET_MODE (SET_SRC (insn1
)) == SImode
)
23932 sd_iterator_def sd_it1
;
23934 /* Check if there is no other dependee for IMUL. */
23936 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
23939 pro
= DEP_PRO (dep1
);
23940 if (!NONDEBUG_INSN_P (pro
))
23953 return issue_rate
; /* Didn't find IMUL producer. */
23955 if (sched_verbose
> 1)
23956 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
23957 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
23959 /* Put IMUL producer (ready[index]) at the top of ready list. */
23960 insn1
= ready
[index
];
23961 for (i
= index
; i
< n_ready
- 1; i
++)
23962 ready
[i
] = ready
[i
+ 1];
23963 ready
[n_ready
- 1] = insn1
;
23970 /* Model decoder of Core 2/i7.
23971 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23972 track the instruction fetch block boundaries and make sure that long
23973 (9+ bytes) instructions are assigned to D0. */
23975 /* Maximum length of an insn that can be handled by
23976 a secondary decoder unit. '8' for Core 2/i7. */
23977 static int core2i7_secondary_decoder_max_insn_size
;
23979 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23980 '16' for Core 2/i7. */
23981 static int core2i7_ifetch_block_size
;
23983 /* Maximum number of instructions decoder can handle per cycle.
23984 '6' for Core 2/i7. */
23985 static int core2i7_ifetch_block_max_insns
;
23987 typedef struct ix86_first_cycle_multipass_data_
*
23988 ix86_first_cycle_multipass_data_t
;
23989 typedef const struct ix86_first_cycle_multipass_data_
*
23990 const_ix86_first_cycle_multipass_data_t
;
23992 /* A variable to store target state across calls to max_issue within
23994 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
23995 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
23997 /* Initialize DATA. */
23999 core2i7_first_cycle_multipass_init (void *_data
)
24001 ix86_first_cycle_multipass_data_t data
24002 = (ix86_first_cycle_multipass_data_t
) _data
;
24004 data
->ifetch_block_len
= 0;
24005 data
->ifetch_block_n_insns
= 0;
24006 data
->ready_try_change
= NULL
;
24007 data
->ready_try_change_size
= 0;
24010 /* Advancing the cycle; reset ifetch block counts. */
24012 core2i7_dfa_post_advance_cycle (void)
24014 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24016 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24018 data
->ifetch_block_len
= 0;
24019 data
->ifetch_block_n_insns
= 0;
24022 static int min_insn_size (rtx
);
24024 /* Filter out insns from ready_try that the core will not be able to issue
24025 on current cycle due to decoder. */
24027 core2i7_first_cycle_multipass_filter_ready_try
24028 (const_ix86_first_cycle_multipass_data_t data
,
24029 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24036 if (ready_try
[n_ready
])
24039 insn
= get_ready_element (n_ready
);
24040 insn_size
= min_insn_size (insn
);
24042 if (/* If this is a too long an insn for a secondary decoder ... */
24043 (!first_cycle_insn_p
24044 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24045 /* ... or it would not fit into the ifetch block ... */
24046 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24047 /* ... or the decoder is full already ... */
24048 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24049 /* ... mask the insn out. */
24051 ready_try
[n_ready
] = 1;
24053 if (data
->ready_try_change
)
24054 SET_BIT (data
->ready_try_change
, n_ready
);
24059 /* Prepare for a new round of multipass lookahead scheduling. */
24061 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24062 bool first_cycle_insn_p
)
24064 ix86_first_cycle_multipass_data_t data
24065 = (ix86_first_cycle_multipass_data_t
) _data
;
24066 const_ix86_first_cycle_multipass_data_t prev_data
24067 = ix86_first_cycle_multipass_data
;
24069 /* Restore the state from the end of the previous round. */
24070 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24071 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24073 /* Filter instructions that cannot be issued on current cycle due to
24074 decoder restrictions. */
24075 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24076 first_cycle_insn_p
);
24079 /* INSN is being issued in current solution. Account for its impact on
24080 the decoder model. */
24082 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24083 rtx insn
, const void *_prev_data
)
24085 ix86_first_cycle_multipass_data_t data
24086 = (ix86_first_cycle_multipass_data_t
) _data
;
24087 const_ix86_first_cycle_multipass_data_t prev_data
24088 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24090 int insn_size
= min_insn_size (insn
);
24092 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24093 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24094 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24095 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24097 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24098 if (!data
->ready_try_change
)
24100 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24101 data
->ready_try_change_size
= n_ready
;
24103 else if (data
->ready_try_change_size
< n_ready
)
24105 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24107 data
->ready_try_change_size
= n_ready
;
24109 sbitmap_zero (data
->ready_try_change
);
24111 /* Filter out insns from ready_try that the core will not be able to issue
24112 on current cycle due to decoder. */
24113 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24117 /* Revert the effect on ready_try. */
24119 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24121 int n_ready ATTRIBUTE_UNUSED
)
24123 const_ix86_first_cycle_multipass_data_t data
24124 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24125 unsigned int i
= 0;
24126 sbitmap_iterator sbi
;
24128 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24129 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
24135 /* Save the result of multipass lookahead scheduling for the next round. */
24137 core2i7_first_cycle_multipass_end (const void *_data
)
24139 const_ix86_first_cycle_multipass_data_t data
24140 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24141 ix86_first_cycle_multipass_data_t next_data
24142 = ix86_first_cycle_multipass_data
;
24146 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24147 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24151 /* Deallocate target data. */
24153 core2i7_first_cycle_multipass_fini (void *_data
)
24155 ix86_first_cycle_multipass_data_t data
24156 = (ix86_first_cycle_multipass_data_t
) _data
;
24158 if (data
->ready_try_change
)
24160 sbitmap_free (data
->ready_try_change
);
24161 data
->ready_try_change
= NULL
;
24162 data
->ready_try_change_size
= 0;
24166 /* Prepare for scheduling pass. */
24168 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24169 int verbose ATTRIBUTE_UNUSED
,
24170 int max_uid ATTRIBUTE_UNUSED
)
24172 /* Install scheduling hooks for current CPU. Some of these hooks are used
24173 in time-critical parts of the scheduler, so we only set them up when
24174 they are actually used. */
24177 case PROCESSOR_CORE2_32
:
24178 case PROCESSOR_CORE2_64
:
24179 case PROCESSOR_COREI7_32
:
24180 case PROCESSOR_COREI7_64
:
24181 targetm
.sched
.dfa_post_advance_cycle
24182 = core2i7_dfa_post_advance_cycle
;
24183 targetm
.sched
.first_cycle_multipass_init
24184 = core2i7_first_cycle_multipass_init
;
24185 targetm
.sched
.first_cycle_multipass_begin
24186 = core2i7_first_cycle_multipass_begin
;
24187 targetm
.sched
.first_cycle_multipass_issue
24188 = core2i7_first_cycle_multipass_issue
;
24189 targetm
.sched
.first_cycle_multipass_backtrack
24190 = core2i7_first_cycle_multipass_backtrack
;
24191 targetm
.sched
.first_cycle_multipass_end
24192 = core2i7_first_cycle_multipass_end
;
24193 targetm
.sched
.first_cycle_multipass_fini
24194 = core2i7_first_cycle_multipass_fini
;
24196 /* Set decoder parameters. */
24197 core2i7_secondary_decoder_max_insn_size
= 8;
24198 core2i7_ifetch_block_size
= 16;
24199 core2i7_ifetch_block_max_insns
= 6;
24203 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24204 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24205 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24206 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24207 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24208 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24209 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24215 /* Compute the alignment given to a constant that is being placed in memory.
24216 EXP is the constant and ALIGN is the alignment that the object would
24218 The value of this function is used instead of that alignment to align
24222 ix86_constant_alignment (tree exp
, int align
)
24224 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24225 || TREE_CODE (exp
) == INTEGER_CST
)
24227 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24229 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24232 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24233 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24234 return BITS_PER_WORD
;
24239 /* Compute the alignment for a static variable.
24240 TYPE is the data type, and ALIGN is the alignment that
24241 the object would ordinarily have. The value of this function is used
24242 instead of that alignment to align the object. */
24245 ix86_data_alignment (tree type
, int align
)
24247 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24249 if (AGGREGATE_TYPE_P (type
)
24250 && TYPE_SIZE (type
)
24251 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24252 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24253 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24254 && align
< max_align
)
24257 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24258 to 16byte boundary. */
24261 if (AGGREGATE_TYPE_P (type
)
24262 && TYPE_SIZE (type
)
24263 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24264 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24265 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24269 if (TREE_CODE (type
) == ARRAY_TYPE
)
24271 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24273 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24276 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24279 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24281 if ((TYPE_MODE (type
) == XCmode
24282 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24285 else if ((TREE_CODE (type
) == RECORD_TYPE
24286 || TREE_CODE (type
) == UNION_TYPE
24287 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24288 && TYPE_FIELDS (type
))
24290 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24292 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24295 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24296 || TREE_CODE (type
) == INTEGER_TYPE
)
24298 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24300 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24307 /* Compute the alignment for a local variable or a stack slot. EXP is
24308 the data type or decl itself, MODE is the widest mode available and
24309 ALIGN is the alignment that the object would ordinarily have. The
24310 value of this macro is used instead of that alignment to align the
24314 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24315 unsigned int align
)
24319 if (exp
&& DECL_P (exp
))
24321 type
= TREE_TYPE (exp
);
24330 /* Don't do dynamic stack realignment for long long objects with
24331 -mpreferred-stack-boundary=2. */
24334 && ix86_preferred_stack_boundary
< 64
24335 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24336 && (!type
|| !TYPE_USER_ALIGN (type
))
24337 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24340 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24341 register in MODE. We will return the largest alignment of XF
24345 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24346 align
= GET_MODE_ALIGNMENT (DFmode
);
24350 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24351 to 16byte boundary. Exact wording is:
24353 An array uses the same alignment as its elements, except that a local or
24354 global array variable of length at least 16 bytes or
24355 a C99 variable-length array variable always has alignment of at least 16 bytes.
24357 This was added to allow use of aligned SSE instructions at arrays. This
24358 rule is meant for static storage (where compiler can not do the analysis
24359 by itself). We follow it for automatic variables only when convenient.
24360 We fully control everything in the function compiled and functions from
24361 other unit can not rely on the alignment.
24363 Exclude va_list type. It is the common case of local array where
24364 we can not benefit from the alignment. */
24365 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24368 if (AGGREGATE_TYPE_P (type
)
24369 && (va_list_type_node
== NULL_TREE
24370 || (TYPE_MAIN_VARIANT (type
)
24371 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24372 && TYPE_SIZE (type
)
24373 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24374 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24375 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24378 if (TREE_CODE (type
) == ARRAY_TYPE
)
24380 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24382 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24385 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24387 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24389 if ((TYPE_MODE (type
) == XCmode
24390 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24393 else if ((TREE_CODE (type
) == RECORD_TYPE
24394 || TREE_CODE (type
) == UNION_TYPE
24395 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24396 && TYPE_FIELDS (type
))
24398 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24400 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24403 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24404 || TREE_CODE (type
) == INTEGER_TYPE
)
24407 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24409 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24415 /* Compute the minimum required alignment for dynamic stack realignment
24416 purposes for a local variable, parameter or a stack slot. EXP is
24417 the data type or decl itself, MODE is its mode and ALIGN is the
24418 alignment that the object would ordinarily have. */
24421 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24422 unsigned int align
)
24426 if (exp
&& DECL_P (exp
))
24428 type
= TREE_TYPE (exp
);
24437 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24440 /* Don't do dynamic stack realignment for long long objects with
24441 -mpreferred-stack-boundary=2. */
24442 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24443 && (!type
|| !TYPE_USER_ALIGN (type
))
24444 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24450 /* Find a location for the static chain incoming to a nested function.
24451 This is a register, unless all free registers are used by arguments. */
24454 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24458 if (!DECL_STATIC_CHAIN (fndecl
))
24463 /* We always use R10 in 64-bit mode. */
24471 /* By default in 32-bit mode we use ECX to pass the static chain. */
24474 fntype
= TREE_TYPE (fndecl
);
24475 ccvt
= ix86_get_callcvt (fntype
);
24476 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24478 /* Fastcall functions use ecx/edx for arguments, which leaves
24479 us with EAX for the static chain.
24480 Thiscall functions use ecx for arguments, which also
24481 leaves us with EAX for the static chain. */
24484 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24486 /* For regparm 3, we have no free call-clobbered registers in
24487 which to store the static chain. In order to implement this,
24488 we have the trampoline push the static chain to the stack.
24489 However, we can't push a value below the return address when
24490 we call the nested function directly, so we have to use an
24491 alternate entry point. For this we use ESI, and have the
24492 alternate entry point push ESI, so that things appear the
24493 same once we're executing the nested function. */
24496 if (fndecl
== current_function_decl
)
24497 ix86_static_chain_on_stack
= true;
24498 return gen_frame_mem (SImode
,
24499 plus_constant (Pmode
,
24500 arg_pointer_rtx
, -8));
24506 return gen_rtx_REG (Pmode
, regno
);
24509 /* Emit RTL insns to initialize the variable parts of a trampoline.
24510 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24511 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24512 to be passed to the target function. */
24515 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24521 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24527 /* Load the function address to r11. Try to load address using
24528 the shorter movl instead of movabs. We may want to support
24529 movq for kernel mode, but kernel does not use trampolines at
24530 the moment. FNADDR is a 32bit address and may not be in
24531 DImode when ptr_mode == SImode. Always use movl in this
24533 if (ptr_mode
== SImode
24534 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24536 fnaddr
= copy_addr_to_reg (fnaddr
);
24538 mem
= adjust_address (m_tramp
, HImode
, offset
);
24539 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24541 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24542 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24547 mem
= adjust_address (m_tramp
, HImode
, offset
);
24548 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24550 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24551 emit_move_insn (mem
, fnaddr
);
24555 /* Load static chain using movabs to r10. Use the shorter movl
24556 instead of movabs when ptr_mode == SImode. */
24557 if (ptr_mode
== SImode
)
24568 mem
= adjust_address (m_tramp
, HImode
, offset
);
24569 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24571 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24572 emit_move_insn (mem
, chain_value
);
24575 /* Jump to r11; the last (unused) byte is a nop, only there to
24576 pad the write out to a single 32-bit store. */
24577 mem
= adjust_address (m_tramp
, SImode
, offset
);
24578 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24585 /* Depending on the static chain location, either load a register
24586 with a constant, or push the constant to the stack. All of the
24587 instructions are the same size. */
24588 chain
= ix86_static_chain (fndecl
, true);
24591 switch (REGNO (chain
))
24594 opcode
= 0xb8; break;
24596 opcode
= 0xb9; break;
24598 gcc_unreachable ();
24604 mem
= adjust_address (m_tramp
, QImode
, offset
);
24605 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24607 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24608 emit_move_insn (mem
, chain_value
);
24611 mem
= adjust_address (m_tramp
, QImode
, offset
);
24612 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24614 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24616 /* Compute offset from the end of the jmp to the target function.
24617 In the case in which the trampoline stores the static chain on
24618 the stack, we need to skip the first insn which pushes the
24619 (call-saved) register static chain; this push is 1 byte. */
24621 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24622 plus_constant (Pmode
, XEXP (m_tramp
, 0),
24623 offset
- (MEM_P (chain
) ? 1 : 0)),
24624 NULL_RTX
, 1, OPTAB_DIRECT
);
24625 emit_move_insn (mem
, disp
);
24628 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24630 #ifdef HAVE_ENABLE_EXECUTE_STACK
24631 #ifdef CHECK_EXECUTE_STACK_ENABLED
24632 if (CHECK_EXECUTE_STACK_ENABLED
)
24634 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24635 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24639 /* The following file contains several enumerations and data structures
24640 built from the definitions in i386-builtin-types.def. */
24642 #include "i386-builtin-types.inc"
24644 /* Table for the ix86 builtin non-function types. */
24645 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24647 /* Retrieve an element from the above table, building some of
24648 the types lazily. */
24651 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24653 unsigned int index
;
24656 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24658 type
= ix86_builtin_type_tab
[(int) tcode
];
24662 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24663 if (tcode
<= IX86_BT_LAST_VECT
)
24665 enum machine_mode mode
;
24667 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24668 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24669 mode
= ix86_builtin_type_vect_mode
[index
];
24671 type
= build_vector_type_for_mode (itype
, mode
);
24677 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24678 if (tcode
<= IX86_BT_LAST_PTR
)
24679 quals
= TYPE_UNQUALIFIED
;
24681 quals
= TYPE_QUAL_CONST
;
24683 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24684 if (quals
!= TYPE_UNQUALIFIED
)
24685 itype
= build_qualified_type (itype
, quals
);
24687 type
= build_pointer_type (itype
);
24690 ix86_builtin_type_tab
[(int) tcode
] = type
;
24694 /* Table for the ix86 builtin function types. */
24695 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24697 /* Retrieve an element from the above table, building some of
24698 the types lazily. */
24701 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24705 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24707 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24711 if (tcode
<= IX86_BT_LAST_FUNC
)
24713 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24714 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24715 tree rtype
, atype
, args
= void_list_node
;
24718 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24719 for (i
= after
- 1; i
> start
; --i
)
24721 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24722 args
= tree_cons (NULL
, atype
, args
);
24725 type
= build_function_type (rtype
, args
);
24729 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24730 enum ix86_builtin_func_type icode
;
24732 icode
= ix86_builtin_func_alias_base
[index
];
24733 type
= ix86_get_builtin_func_type (icode
);
24736 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24741 /* Codes for all the SSE/MMX builtins. */
24744 IX86_BUILTIN_ADDPS
,
24745 IX86_BUILTIN_ADDSS
,
24746 IX86_BUILTIN_DIVPS
,
24747 IX86_BUILTIN_DIVSS
,
24748 IX86_BUILTIN_MULPS
,
24749 IX86_BUILTIN_MULSS
,
24750 IX86_BUILTIN_SUBPS
,
24751 IX86_BUILTIN_SUBSS
,
24753 IX86_BUILTIN_CMPEQPS
,
24754 IX86_BUILTIN_CMPLTPS
,
24755 IX86_BUILTIN_CMPLEPS
,
24756 IX86_BUILTIN_CMPGTPS
,
24757 IX86_BUILTIN_CMPGEPS
,
24758 IX86_BUILTIN_CMPNEQPS
,
24759 IX86_BUILTIN_CMPNLTPS
,
24760 IX86_BUILTIN_CMPNLEPS
,
24761 IX86_BUILTIN_CMPNGTPS
,
24762 IX86_BUILTIN_CMPNGEPS
,
24763 IX86_BUILTIN_CMPORDPS
,
24764 IX86_BUILTIN_CMPUNORDPS
,
24765 IX86_BUILTIN_CMPEQSS
,
24766 IX86_BUILTIN_CMPLTSS
,
24767 IX86_BUILTIN_CMPLESS
,
24768 IX86_BUILTIN_CMPNEQSS
,
24769 IX86_BUILTIN_CMPNLTSS
,
24770 IX86_BUILTIN_CMPNLESS
,
24771 IX86_BUILTIN_CMPNGTSS
,
24772 IX86_BUILTIN_CMPNGESS
,
24773 IX86_BUILTIN_CMPORDSS
,
24774 IX86_BUILTIN_CMPUNORDSS
,
24776 IX86_BUILTIN_COMIEQSS
,
24777 IX86_BUILTIN_COMILTSS
,
24778 IX86_BUILTIN_COMILESS
,
24779 IX86_BUILTIN_COMIGTSS
,
24780 IX86_BUILTIN_COMIGESS
,
24781 IX86_BUILTIN_COMINEQSS
,
24782 IX86_BUILTIN_UCOMIEQSS
,
24783 IX86_BUILTIN_UCOMILTSS
,
24784 IX86_BUILTIN_UCOMILESS
,
24785 IX86_BUILTIN_UCOMIGTSS
,
24786 IX86_BUILTIN_UCOMIGESS
,
24787 IX86_BUILTIN_UCOMINEQSS
,
24789 IX86_BUILTIN_CVTPI2PS
,
24790 IX86_BUILTIN_CVTPS2PI
,
24791 IX86_BUILTIN_CVTSI2SS
,
24792 IX86_BUILTIN_CVTSI642SS
,
24793 IX86_BUILTIN_CVTSS2SI
,
24794 IX86_BUILTIN_CVTSS2SI64
,
24795 IX86_BUILTIN_CVTTPS2PI
,
24796 IX86_BUILTIN_CVTTSS2SI
,
24797 IX86_BUILTIN_CVTTSS2SI64
,
24799 IX86_BUILTIN_MAXPS
,
24800 IX86_BUILTIN_MAXSS
,
24801 IX86_BUILTIN_MINPS
,
24802 IX86_BUILTIN_MINSS
,
24804 IX86_BUILTIN_LOADUPS
,
24805 IX86_BUILTIN_STOREUPS
,
24806 IX86_BUILTIN_MOVSS
,
24808 IX86_BUILTIN_MOVHLPS
,
24809 IX86_BUILTIN_MOVLHPS
,
24810 IX86_BUILTIN_LOADHPS
,
24811 IX86_BUILTIN_LOADLPS
,
24812 IX86_BUILTIN_STOREHPS
,
24813 IX86_BUILTIN_STORELPS
,
24815 IX86_BUILTIN_MASKMOVQ
,
24816 IX86_BUILTIN_MOVMSKPS
,
24817 IX86_BUILTIN_PMOVMSKB
,
24819 IX86_BUILTIN_MOVNTPS
,
24820 IX86_BUILTIN_MOVNTQ
,
24822 IX86_BUILTIN_LOADDQU
,
24823 IX86_BUILTIN_STOREDQU
,
24825 IX86_BUILTIN_PACKSSWB
,
24826 IX86_BUILTIN_PACKSSDW
,
24827 IX86_BUILTIN_PACKUSWB
,
24829 IX86_BUILTIN_PADDB
,
24830 IX86_BUILTIN_PADDW
,
24831 IX86_BUILTIN_PADDD
,
24832 IX86_BUILTIN_PADDQ
,
24833 IX86_BUILTIN_PADDSB
,
24834 IX86_BUILTIN_PADDSW
,
24835 IX86_BUILTIN_PADDUSB
,
24836 IX86_BUILTIN_PADDUSW
,
24837 IX86_BUILTIN_PSUBB
,
24838 IX86_BUILTIN_PSUBW
,
24839 IX86_BUILTIN_PSUBD
,
24840 IX86_BUILTIN_PSUBQ
,
24841 IX86_BUILTIN_PSUBSB
,
24842 IX86_BUILTIN_PSUBSW
,
24843 IX86_BUILTIN_PSUBUSB
,
24844 IX86_BUILTIN_PSUBUSW
,
24847 IX86_BUILTIN_PANDN
,
24851 IX86_BUILTIN_PAVGB
,
24852 IX86_BUILTIN_PAVGW
,
24854 IX86_BUILTIN_PCMPEQB
,
24855 IX86_BUILTIN_PCMPEQW
,
24856 IX86_BUILTIN_PCMPEQD
,
24857 IX86_BUILTIN_PCMPGTB
,
24858 IX86_BUILTIN_PCMPGTW
,
24859 IX86_BUILTIN_PCMPGTD
,
24861 IX86_BUILTIN_PMADDWD
,
24863 IX86_BUILTIN_PMAXSW
,
24864 IX86_BUILTIN_PMAXUB
,
24865 IX86_BUILTIN_PMINSW
,
24866 IX86_BUILTIN_PMINUB
,
24868 IX86_BUILTIN_PMULHUW
,
24869 IX86_BUILTIN_PMULHW
,
24870 IX86_BUILTIN_PMULLW
,
24872 IX86_BUILTIN_PSADBW
,
24873 IX86_BUILTIN_PSHUFW
,
24875 IX86_BUILTIN_PSLLW
,
24876 IX86_BUILTIN_PSLLD
,
24877 IX86_BUILTIN_PSLLQ
,
24878 IX86_BUILTIN_PSRAW
,
24879 IX86_BUILTIN_PSRAD
,
24880 IX86_BUILTIN_PSRLW
,
24881 IX86_BUILTIN_PSRLD
,
24882 IX86_BUILTIN_PSRLQ
,
24883 IX86_BUILTIN_PSLLWI
,
24884 IX86_BUILTIN_PSLLDI
,
24885 IX86_BUILTIN_PSLLQI
,
24886 IX86_BUILTIN_PSRAWI
,
24887 IX86_BUILTIN_PSRADI
,
24888 IX86_BUILTIN_PSRLWI
,
24889 IX86_BUILTIN_PSRLDI
,
24890 IX86_BUILTIN_PSRLQI
,
24892 IX86_BUILTIN_PUNPCKHBW
,
24893 IX86_BUILTIN_PUNPCKHWD
,
24894 IX86_BUILTIN_PUNPCKHDQ
,
24895 IX86_BUILTIN_PUNPCKLBW
,
24896 IX86_BUILTIN_PUNPCKLWD
,
24897 IX86_BUILTIN_PUNPCKLDQ
,
24899 IX86_BUILTIN_SHUFPS
,
24901 IX86_BUILTIN_RCPPS
,
24902 IX86_BUILTIN_RCPSS
,
24903 IX86_BUILTIN_RSQRTPS
,
24904 IX86_BUILTIN_RSQRTPS_NR
,
24905 IX86_BUILTIN_RSQRTSS
,
24906 IX86_BUILTIN_RSQRTF
,
24907 IX86_BUILTIN_SQRTPS
,
24908 IX86_BUILTIN_SQRTPS_NR
,
24909 IX86_BUILTIN_SQRTSS
,
24911 IX86_BUILTIN_UNPCKHPS
,
24912 IX86_BUILTIN_UNPCKLPS
,
24914 IX86_BUILTIN_ANDPS
,
24915 IX86_BUILTIN_ANDNPS
,
24917 IX86_BUILTIN_XORPS
,
24920 IX86_BUILTIN_LDMXCSR
,
24921 IX86_BUILTIN_STMXCSR
,
24922 IX86_BUILTIN_SFENCE
,
24924 /* 3DNow! Original */
24925 IX86_BUILTIN_FEMMS
,
24926 IX86_BUILTIN_PAVGUSB
,
24927 IX86_BUILTIN_PF2ID
,
24928 IX86_BUILTIN_PFACC
,
24929 IX86_BUILTIN_PFADD
,
24930 IX86_BUILTIN_PFCMPEQ
,
24931 IX86_BUILTIN_PFCMPGE
,
24932 IX86_BUILTIN_PFCMPGT
,
24933 IX86_BUILTIN_PFMAX
,
24934 IX86_BUILTIN_PFMIN
,
24935 IX86_BUILTIN_PFMUL
,
24936 IX86_BUILTIN_PFRCP
,
24937 IX86_BUILTIN_PFRCPIT1
,
24938 IX86_BUILTIN_PFRCPIT2
,
24939 IX86_BUILTIN_PFRSQIT1
,
24940 IX86_BUILTIN_PFRSQRT
,
24941 IX86_BUILTIN_PFSUB
,
24942 IX86_BUILTIN_PFSUBR
,
24943 IX86_BUILTIN_PI2FD
,
24944 IX86_BUILTIN_PMULHRW
,
24946 /* 3DNow! Athlon Extensions */
24947 IX86_BUILTIN_PF2IW
,
24948 IX86_BUILTIN_PFNACC
,
24949 IX86_BUILTIN_PFPNACC
,
24950 IX86_BUILTIN_PI2FW
,
24951 IX86_BUILTIN_PSWAPDSI
,
24952 IX86_BUILTIN_PSWAPDSF
,
24955 IX86_BUILTIN_ADDPD
,
24956 IX86_BUILTIN_ADDSD
,
24957 IX86_BUILTIN_DIVPD
,
24958 IX86_BUILTIN_DIVSD
,
24959 IX86_BUILTIN_MULPD
,
24960 IX86_BUILTIN_MULSD
,
24961 IX86_BUILTIN_SUBPD
,
24962 IX86_BUILTIN_SUBSD
,
24964 IX86_BUILTIN_CMPEQPD
,
24965 IX86_BUILTIN_CMPLTPD
,
24966 IX86_BUILTIN_CMPLEPD
,
24967 IX86_BUILTIN_CMPGTPD
,
24968 IX86_BUILTIN_CMPGEPD
,
24969 IX86_BUILTIN_CMPNEQPD
,
24970 IX86_BUILTIN_CMPNLTPD
,
24971 IX86_BUILTIN_CMPNLEPD
,
24972 IX86_BUILTIN_CMPNGTPD
,
24973 IX86_BUILTIN_CMPNGEPD
,
24974 IX86_BUILTIN_CMPORDPD
,
24975 IX86_BUILTIN_CMPUNORDPD
,
24976 IX86_BUILTIN_CMPEQSD
,
24977 IX86_BUILTIN_CMPLTSD
,
24978 IX86_BUILTIN_CMPLESD
,
24979 IX86_BUILTIN_CMPNEQSD
,
24980 IX86_BUILTIN_CMPNLTSD
,
24981 IX86_BUILTIN_CMPNLESD
,
24982 IX86_BUILTIN_CMPORDSD
,
24983 IX86_BUILTIN_CMPUNORDSD
,
24985 IX86_BUILTIN_COMIEQSD
,
24986 IX86_BUILTIN_COMILTSD
,
24987 IX86_BUILTIN_COMILESD
,
24988 IX86_BUILTIN_COMIGTSD
,
24989 IX86_BUILTIN_COMIGESD
,
24990 IX86_BUILTIN_COMINEQSD
,
24991 IX86_BUILTIN_UCOMIEQSD
,
24992 IX86_BUILTIN_UCOMILTSD
,
24993 IX86_BUILTIN_UCOMILESD
,
24994 IX86_BUILTIN_UCOMIGTSD
,
24995 IX86_BUILTIN_UCOMIGESD
,
24996 IX86_BUILTIN_UCOMINEQSD
,
24998 IX86_BUILTIN_MAXPD
,
24999 IX86_BUILTIN_MAXSD
,
25000 IX86_BUILTIN_MINPD
,
25001 IX86_BUILTIN_MINSD
,
25003 IX86_BUILTIN_ANDPD
,
25004 IX86_BUILTIN_ANDNPD
,
25006 IX86_BUILTIN_XORPD
,
25008 IX86_BUILTIN_SQRTPD
,
25009 IX86_BUILTIN_SQRTSD
,
25011 IX86_BUILTIN_UNPCKHPD
,
25012 IX86_BUILTIN_UNPCKLPD
,
25014 IX86_BUILTIN_SHUFPD
,
25016 IX86_BUILTIN_LOADUPD
,
25017 IX86_BUILTIN_STOREUPD
,
25018 IX86_BUILTIN_MOVSD
,
25020 IX86_BUILTIN_LOADHPD
,
25021 IX86_BUILTIN_LOADLPD
,
25023 IX86_BUILTIN_CVTDQ2PD
,
25024 IX86_BUILTIN_CVTDQ2PS
,
25026 IX86_BUILTIN_CVTPD2DQ
,
25027 IX86_BUILTIN_CVTPD2PI
,
25028 IX86_BUILTIN_CVTPD2PS
,
25029 IX86_BUILTIN_CVTTPD2DQ
,
25030 IX86_BUILTIN_CVTTPD2PI
,
25032 IX86_BUILTIN_CVTPI2PD
,
25033 IX86_BUILTIN_CVTSI2SD
,
25034 IX86_BUILTIN_CVTSI642SD
,
25036 IX86_BUILTIN_CVTSD2SI
,
25037 IX86_BUILTIN_CVTSD2SI64
,
25038 IX86_BUILTIN_CVTSD2SS
,
25039 IX86_BUILTIN_CVTSS2SD
,
25040 IX86_BUILTIN_CVTTSD2SI
,
25041 IX86_BUILTIN_CVTTSD2SI64
,
25043 IX86_BUILTIN_CVTPS2DQ
,
25044 IX86_BUILTIN_CVTPS2PD
,
25045 IX86_BUILTIN_CVTTPS2DQ
,
25047 IX86_BUILTIN_MOVNTI
,
25048 IX86_BUILTIN_MOVNTI64
,
25049 IX86_BUILTIN_MOVNTPD
,
25050 IX86_BUILTIN_MOVNTDQ
,
25052 IX86_BUILTIN_MOVQ128
,
25055 IX86_BUILTIN_MASKMOVDQU
,
25056 IX86_BUILTIN_MOVMSKPD
,
25057 IX86_BUILTIN_PMOVMSKB128
,
25059 IX86_BUILTIN_PACKSSWB128
,
25060 IX86_BUILTIN_PACKSSDW128
,
25061 IX86_BUILTIN_PACKUSWB128
,
25063 IX86_BUILTIN_PADDB128
,
25064 IX86_BUILTIN_PADDW128
,
25065 IX86_BUILTIN_PADDD128
,
25066 IX86_BUILTIN_PADDQ128
,
25067 IX86_BUILTIN_PADDSB128
,
25068 IX86_BUILTIN_PADDSW128
,
25069 IX86_BUILTIN_PADDUSB128
,
25070 IX86_BUILTIN_PADDUSW128
,
25071 IX86_BUILTIN_PSUBB128
,
25072 IX86_BUILTIN_PSUBW128
,
25073 IX86_BUILTIN_PSUBD128
,
25074 IX86_BUILTIN_PSUBQ128
,
25075 IX86_BUILTIN_PSUBSB128
,
25076 IX86_BUILTIN_PSUBSW128
,
25077 IX86_BUILTIN_PSUBUSB128
,
25078 IX86_BUILTIN_PSUBUSW128
,
25080 IX86_BUILTIN_PAND128
,
25081 IX86_BUILTIN_PANDN128
,
25082 IX86_BUILTIN_POR128
,
25083 IX86_BUILTIN_PXOR128
,
25085 IX86_BUILTIN_PAVGB128
,
25086 IX86_BUILTIN_PAVGW128
,
25088 IX86_BUILTIN_PCMPEQB128
,
25089 IX86_BUILTIN_PCMPEQW128
,
25090 IX86_BUILTIN_PCMPEQD128
,
25091 IX86_BUILTIN_PCMPGTB128
,
25092 IX86_BUILTIN_PCMPGTW128
,
25093 IX86_BUILTIN_PCMPGTD128
,
25095 IX86_BUILTIN_PMADDWD128
,
25097 IX86_BUILTIN_PMAXSW128
,
25098 IX86_BUILTIN_PMAXUB128
,
25099 IX86_BUILTIN_PMINSW128
,
25100 IX86_BUILTIN_PMINUB128
,
25102 IX86_BUILTIN_PMULUDQ
,
25103 IX86_BUILTIN_PMULUDQ128
,
25104 IX86_BUILTIN_PMULHUW128
,
25105 IX86_BUILTIN_PMULHW128
,
25106 IX86_BUILTIN_PMULLW128
,
25108 IX86_BUILTIN_PSADBW128
,
25109 IX86_BUILTIN_PSHUFHW
,
25110 IX86_BUILTIN_PSHUFLW
,
25111 IX86_BUILTIN_PSHUFD
,
25113 IX86_BUILTIN_PSLLDQI128
,
25114 IX86_BUILTIN_PSLLWI128
,
25115 IX86_BUILTIN_PSLLDI128
,
25116 IX86_BUILTIN_PSLLQI128
,
25117 IX86_BUILTIN_PSRAWI128
,
25118 IX86_BUILTIN_PSRADI128
,
25119 IX86_BUILTIN_PSRLDQI128
,
25120 IX86_BUILTIN_PSRLWI128
,
25121 IX86_BUILTIN_PSRLDI128
,
25122 IX86_BUILTIN_PSRLQI128
,
25124 IX86_BUILTIN_PSLLDQ128
,
25125 IX86_BUILTIN_PSLLW128
,
25126 IX86_BUILTIN_PSLLD128
,
25127 IX86_BUILTIN_PSLLQ128
,
25128 IX86_BUILTIN_PSRAW128
,
25129 IX86_BUILTIN_PSRAD128
,
25130 IX86_BUILTIN_PSRLW128
,
25131 IX86_BUILTIN_PSRLD128
,
25132 IX86_BUILTIN_PSRLQ128
,
25134 IX86_BUILTIN_PUNPCKHBW128
,
25135 IX86_BUILTIN_PUNPCKHWD128
,
25136 IX86_BUILTIN_PUNPCKHDQ128
,
25137 IX86_BUILTIN_PUNPCKHQDQ128
,
25138 IX86_BUILTIN_PUNPCKLBW128
,
25139 IX86_BUILTIN_PUNPCKLWD128
,
25140 IX86_BUILTIN_PUNPCKLDQ128
,
25141 IX86_BUILTIN_PUNPCKLQDQ128
,
25143 IX86_BUILTIN_CLFLUSH
,
25144 IX86_BUILTIN_MFENCE
,
25145 IX86_BUILTIN_LFENCE
,
25146 IX86_BUILTIN_PAUSE
,
25148 IX86_BUILTIN_BSRSI
,
25149 IX86_BUILTIN_BSRDI
,
25150 IX86_BUILTIN_RDPMC
,
25151 IX86_BUILTIN_RDTSC
,
25152 IX86_BUILTIN_RDTSCP
,
25153 IX86_BUILTIN_ROLQI
,
25154 IX86_BUILTIN_ROLHI
,
25155 IX86_BUILTIN_RORQI
,
25156 IX86_BUILTIN_RORHI
,
25159 IX86_BUILTIN_ADDSUBPS
,
25160 IX86_BUILTIN_HADDPS
,
25161 IX86_BUILTIN_HSUBPS
,
25162 IX86_BUILTIN_MOVSHDUP
,
25163 IX86_BUILTIN_MOVSLDUP
,
25164 IX86_BUILTIN_ADDSUBPD
,
25165 IX86_BUILTIN_HADDPD
,
25166 IX86_BUILTIN_HSUBPD
,
25167 IX86_BUILTIN_LDDQU
,
25169 IX86_BUILTIN_MONITOR
,
25170 IX86_BUILTIN_MWAIT
,
25173 IX86_BUILTIN_PHADDW
,
25174 IX86_BUILTIN_PHADDD
,
25175 IX86_BUILTIN_PHADDSW
,
25176 IX86_BUILTIN_PHSUBW
,
25177 IX86_BUILTIN_PHSUBD
,
25178 IX86_BUILTIN_PHSUBSW
,
25179 IX86_BUILTIN_PMADDUBSW
,
25180 IX86_BUILTIN_PMULHRSW
,
25181 IX86_BUILTIN_PSHUFB
,
25182 IX86_BUILTIN_PSIGNB
,
25183 IX86_BUILTIN_PSIGNW
,
25184 IX86_BUILTIN_PSIGND
,
25185 IX86_BUILTIN_PALIGNR
,
25186 IX86_BUILTIN_PABSB
,
25187 IX86_BUILTIN_PABSW
,
25188 IX86_BUILTIN_PABSD
,
25190 IX86_BUILTIN_PHADDW128
,
25191 IX86_BUILTIN_PHADDD128
,
25192 IX86_BUILTIN_PHADDSW128
,
25193 IX86_BUILTIN_PHSUBW128
,
25194 IX86_BUILTIN_PHSUBD128
,
25195 IX86_BUILTIN_PHSUBSW128
,
25196 IX86_BUILTIN_PMADDUBSW128
,
25197 IX86_BUILTIN_PMULHRSW128
,
25198 IX86_BUILTIN_PSHUFB128
,
25199 IX86_BUILTIN_PSIGNB128
,
25200 IX86_BUILTIN_PSIGNW128
,
25201 IX86_BUILTIN_PSIGND128
,
25202 IX86_BUILTIN_PALIGNR128
,
25203 IX86_BUILTIN_PABSB128
,
25204 IX86_BUILTIN_PABSW128
,
25205 IX86_BUILTIN_PABSD128
,
25207 /* AMDFAM10 - SSE4A New Instructions. */
25208 IX86_BUILTIN_MOVNTSD
,
25209 IX86_BUILTIN_MOVNTSS
,
25210 IX86_BUILTIN_EXTRQI
,
25211 IX86_BUILTIN_EXTRQ
,
25212 IX86_BUILTIN_INSERTQI
,
25213 IX86_BUILTIN_INSERTQ
,
25216 IX86_BUILTIN_BLENDPD
,
25217 IX86_BUILTIN_BLENDPS
,
25218 IX86_BUILTIN_BLENDVPD
,
25219 IX86_BUILTIN_BLENDVPS
,
25220 IX86_BUILTIN_PBLENDVB128
,
25221 IX86_BUILTIN_PBLENDW128
,
25226 IX86_BUILTIN_INSERTPS128
,
25228 IX86_BUILTIN_MOVNTDQA
,
25229 IX86_BUILTIN_MPSADBW128
,
25230 IX86_BUILTIN_PACKUSDW128
,
25231 IX86_BUILTIN_PCMPEQQ
,
25232 IX86_BUILTIN_PHMINPOSUW128
,
25234 IX86_BUILTIN_PMAXSB128
,
25235 IX86_BUILTIN_PMAXSD128
,
25236 IX86_BUILTIN_PMAXUD128
,
25237 IX86_BUILTIN_PMAXUW128
,
25239 IX86_BUILTIN_PMINSB128
,
25240 IX86_BUILTIN_PMINSD128
,
25241 IX86_BUILTIN_PMINUD128
,
25242 IX86_BUILTIN_PMINUW128
,
25244 IX86_BUILTIN_PMOVSXBW128
,
25245 IX86_BUILTIN_PMOVSXBD128
,
25246 IX86_BUILTIN_PMOVSXBQ128
,
25247 IX86_BUILTIN_PMOVSXWD128
,
25248 IX86_BUILTIN_PMOVSXWQ128
,
25249 IX86_BUILTIN_PMOVSXDQ128
,
25251 IX86_BUILTIN_PMOVZXBW128
,
25252 IX86_BUILTIN_PMOVZXBD128
,
25253 IX86_BUILTIN_PMOVZXBQ128
,
25254 IX86_BUILTIN_PMOVZXWD128
,
25255 IX86_BUILTIN_PMOVZXWQ128
,
25256 IX86_BUILTIN_PMOVZXDQ128
,
25258 IX86_BUILTIN_PMULDQ128
,
25259 IX86_BUILTIN_PMULLD128
,
25261 IX86_BUILTIN_ROUNDSD
,
25262 IX86_BUILTIN_ROUNDSS
,
25264 IX86_BUILTIN_ROUNDPD
,
25265 IX86_BUILTIN_ROUNDPS
,
25267 IX86_BUILTIN_FLOORPD
,
25268 IX86_BUILTIN_CEILPD
,
25269 IX86_BUILTIN_TRUNCPD
,
25270 IX86_BUILTIN_RINTPD
,
25271 IX86_BUILTIN_ROUNDPD_AZ
,
25273 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25274 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25275 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25277 IX86_BUILTIN_FLOORPS
,
25278 IX86_BUILTIN_CEILPS
,
25279 IX86_BUILTIN_TRUNCPS
,
25280 IX86_BUILTIN_RINTPS
,
25281 IX86_BUILTIN_ROUNDPS_AZ
,
25283 IX86_BUILTIN_FLOORPS_SFIX
,
25284 IX86_BUILTIN_CEILPS_SFIX
,
25285 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25287 IX86_BUILTIN_PTESTZ
,
25288 IX86_BUILTIN_PTESTC
,
25289 IX86_BUILTIN_PTESTNZC
,
25291 IX86_BUILTIN_VEC_INIT_V2SI
,
25292 IX86_BUILTIN_VEC_INIT_V4HI
,
25293 IX86_BUILTIN_VEC_INIT_V8QI
,
25294 IX86_BUILTIN_VEC_EXT_V2DF
,
25295 IX86_BUILTIN_VEC_EXT_V2DI
,
25296 IX86_BUILTIN_VEC_EXT_V4SF
,
25297 IX86_BUILTIN_VEC_EXT_V4SI
,
25298 IX86_BUILTIN_VEC_EXT_V8HI
,
25299 IX86_BUILTIN_VEC_EXT_V2SI
,
25300 IX86_BUILTIN_VEC_EXT_V4HI
,
25301 IX86_BUILTIN_VEC_EXT_V16QI
,
25302 IX86_BUILTIN_VEC_SET_V2DI
,
25303 IX86_BUILTIN_VEC_SET_V4SF
,
25304 IX86_BUILTIN_VEC_SET_V4SI
,
25305 IX86_BUILTIN_VEC_SET_V8HI
,
25306 IX86_BUILTIN_VEC_SET_V4HI
,
25307 IX86_BUILTIN_VEC_SET_V16QI
,
25309 IX86_BUILTIN_VEC_PACK_SFIX
,
25310 IX86_BUILTIN_VEC_PACK_SFIX256
,
25313 IX86_BUILTIN_CRC32QI
,
25314 IX86_BUILTIN_CRC32HI
,
25315 IX86_BUILTIN_CRC32SI
,
25316 IX86_BUILTIN_CRC32DI
,
25318 IX86_BUILTIN_PCMPESTRI128
,
25319 IX86_BUILTIN_PCMPESTRM128
,
25320 IX86_BUILTIN_PCMPESTRA128
,
25321 IX86_BUILTIN_PCMPESTRC128
,
25322 IX86_BUILTIN_PCMPESTRO128
,
25323 IX86_BUILTIN_PCMPESTRS128
,
25324 IX86_BUILTIN_PCMPESTRZ128
,
25325 IX86_BUILTIN_PCMPISTRI128
,
25326 IX86_BUILTIN_PCMPISTRM128
,
25327 IX86_BUILTIN_PCMPISTRA128
,
25328 IX86_BUILTIN_PCMPISTRC128
,
25329 IX86_BUILTIN_PCMPISTRO128
,
25330 IX86_BUILTIN_PCMPISTRS128
,
25331 IX86_BUILTIN_PCMPISTRZ128
,
25333 IX86_BUILTIN_PCMPGTQ
,
25335 /* AES instructions */
25336 IX86_BUILTIN_AESENC128
,
25337 IX86_BUILTIN_AESENCLAST128
,
25338 IX86_BUILTIN_AESDEC128
,
25339 IX86_BUILTIN_AESDECLAST128
,
25340 IX86_BUILTIN_AESIMC128
,
25341 IX86_BUILTIN_AESKEYGENASSIST128
,
25343 /* PCLMUL instruction */
25344 IX86_BUILTIN_PCLMULQDQ128
,
25347 IX86_BUILTIN_ADDPD256
,
25348 IX86_BUILTIN_ADDPS256
,
25349 IX86_BUILTIN_ADDSUBPD256
,
25350 IX86_BUILTIN_ADDSUBPS256
,
25351 IX86_BUILTIN_ANDPD256
,
25352 IX86_BUILTIN_ANDPS256
,
25353 IX86_BUILTIN_ANDNPD256
,
25354 IX86_BUILTIN_ANDNPS256
,
25355 IX86_BUILTIN_BLENDPD256
,
25356 IX86_BUILTIN_BLENDPS256
,
25357 IX86_BUILTIN_BLENDVPD256
,
25358 IX86_BUILTIN_BLENDVPS256
,
25359 IX86_BUILTIN_DIVPD256
,
25360 IX86_BUILTIN_DIVPS256
,
25361 IX86_BUILTIN_DPPS256
,
25362 IX86_BUILTIN_HADDPD256
,
25363 IX86_BUILTIN_HADDPS256
,
25364 IX86_BUILTIN_HSUBPD256
,
25365 IX86_BUILTIN_HSUBPS256
,
25366 IX86_BUILTIN_MAXPD256
,
25367 IX86_BUILTIN_MAXPS256
,
25368 IX86_BUILTIN_MINPD256
,
25369 IX86_BUILTIN_MINPS256
,
25370 IX86_BUILTIN_MULPD256
,
25371 IX86_BUILTIN_MULPS256
,
25372 IX86_BUILTIN_ORPD256
,
25373 IX86_BUILTIN_ORPS256
,
25374 IX86_BUILTIN_SHUFPD256
,
25375 IX86_BUILTIN_SHUFPS256
,
25376 IX86_BUILTIN_SUBPD256
,
25377 IX86_BUILTIN_SUBPS256
,
25378 IX86_BUILTIN_XORPD256
,
25379 IX86_BUILTIN_XORPS256
,
25380 IX86_BUILTIN_CMPSD
,
25381 IX86_BUILTIN_CMPSS
,
25382 IX86_BUILTIN_CMPPD
,
25383 IX86_BUILTIN_CMPPS
,
25384 IX86_BUILTIN_CMPPD256
,
25385 IX86_BUILTIN_CMPPS256
,
25386 IX86_BUILTIN_CVTDQ2PD256
,
25387 IX86_BUILTIN_CVTDQ2PS256
,
25388 IX86_BUILTIN_CVTPD2PS256
,
25389 IX86_BUILTIN_CVTPS2DQ256
,
25390 IX86_BUILTIN_CVTPS2PD256
,
25391 IX86_BUILTIN_CVTTPD2DQ256
,
25392 IX86_BUILTIN_CVTPD2DQ256
,
25393 IX86_BUILTIN_CVTTPS2DQ256
,
25394 IX86_BUILTIN_EXTRACTF128PD256
,
25395 IX86_BUILTIN_EXTRACTF128PS256
,
25396 IX86_BUILTIN_EXTRACTF128SI256
,
25397 IX86_BUILTIN_VZEROALL
,
25398 IX86_BUILTIN_VZEROUPPER
,
25399 IX86_BUILTIN_VPERMILVARPD
,
25400 IX86_BUILTIN_VPERMILVARPS
,
25401 IX86_BUILTIN_VPERMILVARPD256
,
25402 IX86_BUILTIN_VPERMILVARPS256
,
25403 IX86_BUILTIN_VPERMILPD
,
25404 IX86_BUILTIN_VPERMILPS
,
25405 IX86_BUILTIN_VPERMILPD256
,
25406 IX86_BUILTIN_VPERMILPS256
,
25407 IX86_BUILTIN_VPERMIL2PD
,
25408 IX86_BUILTIN_VPERMIL2PS
,
25409 IX86_BUILTIN_VPERMIL2PD256
,
25410 IX86_BUILTIN_VPERMIL2PS256
,
25411 IX86_BUILTIN_VPERM2F128PD256
,
25412 IX86_BUILTIN_VPERM2F128PS256
,
25413 IX86_BUILTIN_VPERM2F128SI256
,
25414 IX86_BUILTIN_VBROADCASTSS
,
25415 IX86_BUILTIN_VBROADCASTSD256
,
25416 IX86_BUILTIN_VBROADCASTSS256
,
25417 IX86_BUILTIN_VBROADCASTPD256
,
25418 IX86_BUILTIN_VBROADCASTPS256
,
25419 IX86_BUILTIN_VINSERTF128PD256
,
25420 IX86_BUILTIN_VINSERTF128PS256
,
25421 IX86_BUILTIN_VINSERTF128SI256
,
25422 IX86_BUILTIN_LOADUPD256
,
25423 IX86_BUILTIN_LOADUPS256
,
25424 IX86_BUILTIN_STOREUPD256
,
25425 IX86_BUILTIN_STOREUPS256
,
25426 IX86_BUILTIN_LDDQU256
,
25427 IX86_BUILTIN_MOVNTDQ256
,
25428 IX86_BUILTIN_MOVNTPD256
,
25429 IX86_BUILTIN_MOVNTPS256
,
25430 IX86_BUILTIN_LOADDQU256
,
25431 IX86_BUILTIN_STOREDQU256
,
25432 IX86_BUILTIN_MASKLOADPD
,
25433 IX86_BUILTIN_MASKLOADPS
,
25434 IX86_BUILTIN_MASKSTOREPD
,
25435 IX86_BUILTIN_MASKSTOREPS
,
25436 IX86_BUILTIN_MASKLOADPD256
,
25437 IX86_BUILTIN_MASKLOADPS256
,
25438 IX86_BUILTIN_MASKSTOREPD256
,
25439 IX86_BUILTIN_MASKSTOREPS256
,
25440 IX86_BUILTIN_MOVSHDUP256
,
25441 IX86_BUILTIN_MOVSLDUP256
,
25442 IX86_BUILTIN_MOVDDUP256
,
25444 IX86_BUILTIN_SQRTPD256
,
25445 IX86_BUILTIN_SQRTPS256
,
25446 IX86_BUILTIN_SQRTPS_NR256
,
25447 IX86_BUILTIN_RSQRTPS256
,
25448 IX86_BUILTIN_RSQRTPS_NR256
,
25450 IX86_BUILTIN_RCPPS256
,
25452 IX86_BUILTIN_ROUNDPD256
,
25453 IX86_BUILTIN_ROUNDPS256
,
25455 IX86_BUILTIN_FLOORPD256
,
25456 IX86_BUILTIN_CEILPD256
,
25457 IX86_BUILTIN_TRUNCPD256
,
25458 IX86_BUILTIN_RINTPD256
,
25459 IX86_BUILTIN_ROUNDPD_AZ256
,
25461 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25462 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25463 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25465 IX86_BUILTIN_FLOORPS256
,
25466 IX86_BUILTIN_CEILPS256
,
25467 IX86_BUILTIN_TRUNCPS256
,
25468 IX86_BUILTIN_RINTPS256
,
25469 IX86_BUILTIN_ROUNDPS_AZ256
,
25471 IX86_BUILTIN_FLOORPS_SFIX256
,
25472 IX86_BUILTIN_CEILPS_SFIX256
,
25473 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25475 IX86_BUILTIN_UNPCKHPD256
,
25476 IX86_BUILTIN_UNPCKLPD256
,
25477 IX86_BUILTIN_UNPCKHPS256
,
25478 IX86_BUILTIN_UNPCKLPS256
,
25480 IX86_BUILTIN_SI256_SI
,
25481 IX86_BUILTIN_PS256_PS
,
25482 IX86_BUILTIN_PD256_PD
,
25483 IX86_BUILTIN_SI_SI256
,
25484 IX86_BUILTIN_PS_PS256
,
25485 IX86_BUILTIN_PD_PD256
,
25487 IX86_BUILTIN_VTESTZPD
,
25488 IX86_BUILTIN_VTESTCPD
,
25489 IX86_BUILTIN_VTESTNZCPD
,
25490 IX86_BUILTIN_VTESTZPS
,
25491 IX86_BUILTIN_VTESTCPS
,
25492 IX86_BUILTIN_VTESTNZCPS
,
25493 IX86_BUILTIN_VTESTZPD256
,
25494 IX86_BUILTIN_VTESTCPD256
,
25495 IX86_BUILTIN_VTESTNZCPD256
,
25496 IX86_BUILTIN_VTESTZPS256
,
25497 IX86_BUILTIN_VTESTCPS256
,
25498 IX86_BUILTIN_VTESTNZCPS256
,
25499 IX86_BUILTIN_PTESTZ256
,
25500 IX86_BUILTIN_PTESTC256
,
25501 IX86_BUILTIN_PTESTNZC256
,
25503 IX86_BUILTIN_MOVMSKPD256
,
25504 IX86_BUILTIN_MOVMSKPS256
,
25507 IX86_BUILTIN_MPSADBW256
,
25508 IX86_BUILTIN_PABSB256
,
25509 IX86_BUILTIN_PABSW256
,
25510 IX86_BUILTIN_PABSD256
,
25511 IX86_BUILTIN_PACKSSDW256
,
25512 IX86_BUILTIN_PACKSSWB256
,
25513 IX86_BUILTIN_PACKUSDW256
,
25514 IX86_BUILTIN_PACKUSWB256
,
25515 IX86_BUILTIN_PADDB256
,
25516 IX86_BUILTIN_PADDW256
,
25517 IX86_BUILTIN_PADDD256
,
25518 IX86_BUILTIN_PADDQ256
,
25519 IX86_BUILTIN_PADDSB256
,
25520 IX86_BUILTIN_PADDSW256
,
25521 IX86_BUILTIN_PADDUSB256
,
25522 IX86_BUILTIN_PADDUSW256
,
25523 IX86_BUILTIN_PALIGNR256
,
25524 IX86_BUILTIN_AND256I
,
25525 IX86_BUILTIN_ANDNOT256I
,
25526 IX86_BUILTIN_PAVGB256
,
25527 IX86_BUILTIN_PAVGW256
,
25528 IX86_BUILTIN_PBLENDVB256
,
25529 IX86_BUILTIN_PBLENDVW256
,
25530 IX86_BUILTIN_PCMPEQB256
,
25531 IX86_BUILTIN_PCMPEQW256
,
25532 IX86_BUILTIN_PCMPEQD256
,
25533 IX86_BUILTIN_PCMPEQQ256
,
25534 IX86_BUILTIN_PCMPGTB256
,
25535 IX86_BUILTIN_PCMPGTW256
,
25536 IX86_BUILTIN_PCMPGTD256
,
25537 IX86_BUILTIN_PCMPGTQ256
,
25538 IX86_BUILTIN_PHADDW256
,
25539 IX86_BUILTIN_PHADDD256
,
25540 IX86_BUILTIN_PHADDSW256
,
25541 IX86_BUILTIN_PHSUBW256
,
25542 IX86_BUILTIN_PHSUBD256
,
25543 IX86_BUILTIN_PHSUBSW256
,
25544 IX86_BUILTIN_PMADDUBSW256
,
25545 IX86_BUILTIN_PMADDWD256
,
25546 IX86_BUILTIN_PMAXSB256
,
25547 IX86_BUILTIN_PMAXSW256
,
25548 IX86_BUILTIN_PMAXSD256
,
25549 IX86_BUILTIN_PMAXUB256
,
25550 IX86_BUILTIN_PMAXUW256
,
25551 IX86_BUILTIN_PMAXUD256
,
25552 IX86_BUILTIN_PMINSB256
,
25553 IX86_BUILTIN_PMINSW256
,
25554 IX86_BUILTIN_PMINSD256
,
25555 IX86_BUILTIN_PMINUB256
,
25556 IX86_BUILTIN_PMINUW256
,
25557 IX86_BUILTIN_PMINUD256
,
25558 IX86_BUILTIN_PMOVMSKB256
,
25559 IX86_BUILTIN_PMOVSXBW256
,
25560 IX86_BUILTIN_PMOVSXBD256
,
25561 IX86_BUILTIN_PMOVSXBQ256
,
25562 IX86_BUILTIN_PMOVSXWD256
,
25563 IX86_BUILTIN_PMOVSXWQ256
,
25564 IX86_BUILTIN_PMOVSXDQ256
,
25565 IX86_BUILTIN_PMOVZXBW256
,
25566 IX86_BUILTIN_PMOVZXBD256
,
25567 IX86_BUILTIN_PMOVZXBQ256
,
25568 IX86_BUILTIN_PMOVZXWD256
,
25569 IX86_BUILTIN_PMOVZXWQ256
,
25570 IX86_BUILTIN_PMOVZXDQ256
,
25571 IX86_BUILTIN_PMULDQ256
,
25572 IX86_BUILTIN_PMULHRSW256
,
25573 IX86_BUILTIN_PMULHUW256
,
25574 IX86_BUILTIN_PMULHW256
,
25575 IX86_BUILTIN_PMULLW256
,
25576 IX86_BUILTIN_PMULLD256
,
25577 IX86_BUILTIN_PMULUDQ256
,
25578 IX86_BUILTIN_POR256
,
25579 IX86_BUILTIN_PSADBW256
,
25580 IX86_BUILTIN_PSHUFB256
,
25581 IX86_BUILTIN_PSHUFD256
,
25582 IX86_BUILTIN_PSHUFHW256
,
25583 IX86_BUILTIN_PSHUFLW256
,
25584 IX86_BUILTIN_PSIGNB256
,
25585 IX86_BUILTIN_PSIGNW256
,
25586 IX86_BUILTIN_PSIGND256
,
25587 IX86_BUILTIN_PSLLDQI256
,
25588 IX86_BUILTIN_PSLLWI256
,
25589 IX86_BUILTIN_PSLLW256
,
25590 IX86_BUILTIN_PSLLDI256
,
25591 IX86_BUILTIN_PSLLD256
,
25592 IX86_BUILTIN_PSLLQI256
,
25593 IX86_BUILTIN_PSLLQ256
,
25594 IX86_BUILTIN_PSRAWI256
,
25595 IX86_BUILTIN_PSRAW256
,
25596 IX86_BUILTIN_PSRADI256
,
25597 IX86_BUILTIN_PSRAD256
,
25598 IX86_BUILTIN_PSRLDQI256
,
25599 IX86_BUILTIN_PSRLWI256
,
25600 IX86_BUILTIN_PSRLW256
,
25601 IX86_BUILTIN_PSRLDI256
,
25602 IX86_BUILTIN_PSRLD256
,
25603 IX86_BUILTIN_PSRLQI256
,
25604 IX86_BUILTIN_PSRLQ256
,
25605 IX86_BUILTIN_PSUBB256
,
25606 IX86_BUILTIN_PSUBW256
,
25607 IX86_BUILTIN_PSUBD256
,
25608 IX86_BUILTIN_PSUBQ256
,
25609 IX86_BUILTIN_PSUBSB256
,
25610 IX86_BUILTIN_PSUBSW256
,
25611 IX86_BUILTIN_PSUBUSB256
,
25612 IX86_BUILTIN_PSUBUSW256
,
25613 IX86_BUILTIN_PUNPCKHBW256
,
25614 IX86_BUILTIN_PUNPCKHWD256
,
25615 IX86_BUILTIN_PUNPCKHDQ256
,
25616 IX86_BUILTIN_PUNPCKHQDQ256
,
25617 IX86_BUILTIN_PUNPCKLBW256
,
25618 IX86_BUILTIN_PUNPCKLWD256
,
25619 IX86_BUILTIN_PUNPCKLDQ256
,
25620 IX86_BUILTIN_PUNPCKLQDQ256
,
25621 IX86_BUILTIN_PXOR256
,
25622 IX86_BUILTIN_MOVNTDQA256
,
25623 IX86_BUILTIN_VBROADCASTSS_PS
,
25624 IX86_BUILTIN_VBROADCASTSS_PS256
,
25625 IX86_BUILTIN_VBROADCASTSD_PD256
,
25626 IX86_BUILTIN_VBROADCASTSI256
,
25627 IX86_BUILTIN_PBLENDD256
,
25628 IX86_BUILTIN_PBLENDD128
,
25629 IX86_BUILTIN_PBROADCASTB256
,
25630 IX86_BUILTIN_PBROADCASTW256
,
25631 IX86_BUILTIN_PBROADCASTD256
,
25632 IX86_BUILTIN_PBROADCASTQ256
,
25633 IX86_BUILTIN_PBROADCASTB128
,
25634 IX86_BUILTIN_PBROADCASTW128
,
25635 IX86_BUILTIN_PBROADCASTD128
,
25636 IX86_BUILTIN_PBROADCASTQ128
,
25637 IX86_BUILTIN_VPERMVARSI256
,
25638 IX86_BUILTIN_VPERMDF256
,
25639 IX86_BUILTIN_VPERMVARSF256
,
25640 IX86_BUILTIN_VPERMDI256
,
25641 IX86_BUILTIN_VPERMTI256
,
25642 IX86_BUILTIN_VEXTRACT128I256
,
25643 IX86_BUILTIN_VINSERT128I256
,
25644 IX86_BUILTIN_MASKLOADD
,
25645 IX86_BUILTIN_MASKLOADQ
,
25646 IX86_BUILTIN_MASKLOADD256
,
25647 IX86_BUILTIN_MASKLOADQ256
,
25648 IX86_BUILTIN_MASKSTORED
,
25649 IX86_BUILTIN_MASKSTOREQ
,
25650 IX86_BUILTIN_MASKSTORED256
,
25651 IX86_BUILTIN_MASKSTOREQ256
,
25652 IX86_BUILTIN_PSLLVV4DI
,
25653 IX86_BUILTIN_PSLLVV2DI
,
25654 IX86_BUILTIN_PSLLVV8SI
,
25655 IX86_BUILTIN_PSLLVV4SI
,
25656 IX86_BUILTIN_PSRAVV8SI
,
25657 IX86_BUILTIN_PSRAVV4SI
,
25658 IX86_BUILTIN_PSRLVV4DI
,
25659 IX86_BUILTIN_PSRLVV2DI
,
25660 IX86_BUILTIN_PSRLVV8SI
,
25661 IX86_BUILTIN_PSRLVV4SI
,
25663 IX86_BUILTIN_GATHERSIV2DF
,
25664 IX86_BUILTIN_GATHERSIV4DF
,
25665 IX86_BUILTIN_GATHERDIV2DF
,
25666 IX86_BUILTIN_GATHERDIV4DF
,
25667 IX86_BUILTIN_GATHERSIV4SF
,
25668 IX86_BUILTIN_GATHERSIV8SF
,
25669 IX86_BUILTIN_GATHERDIV4SF
,
25670 IX86_BUILTIN_GATHERDIV8SF
,
25671 IX86_BUILTIN_GATHERSIV2DI
,
25672 IX86_BUILTIN_GATHERSIV4DI
,
25673 IX86_BUILTIN_GATHERDIV2DI
,
25674 IX86_BUILTIN_GATHERDIV4DI
,
25675 IX86_BUILTIN_GATHERSIV4SI
,
25676 IX86_BUILTIN_GATHERSIV8SI
,
25677 IX86_BUILTIN_GATHERDIV4SI
,
25678 IX86_BUILTIN_GATHERDIV8SI
,
25680 /* Alternate 4 element gather for the vectorizer where
25681 all operands are 32-byte wide. */
25682 IX86_BUILTIN_GATHERALTSIV4DF
,
25683 IX86_BUILTIN_GATHERALTDIV8SF
,
25684 IX86_BUILTIN_GATHERALTSIV4DI
,
25685 IX86_BUILTIN_GATHERALTDIV8SI
,
25687 /* TFmode support builtins. */
25689 IX86_BUILTIN_HUGE_VALQ
,
25690 IX86_BUILTIN_FABSQ
,
25691 IX86_BUILTIN_COPYSIGNQ
,
25693 /* Vectorizer support builtins. */
25694 IX86_BUILTIN_CPYSGNPS
,
25695 IX86_BUILTIN_CPYSGNPD
,
25696 IX86_BUILTIN_CPYSGNPS256
,
25697 IX86_BUILTIN_CPYSGNPD256
,
25699 /* FMA4 instructions. */
25700 IX86_BUILTIN_VFMADDSS
,
25701 IX86_BUILTIN_VFMADDSD
,
25702 IX86_BUILTIN_VFMADDPS
,
25703 IX86_BUILTIN_VFMADDPD
,
25704 IX86_BUILTIN_VFMADDPS256
,
25705 IX86_BUILTIN_VFMADDPD256
,
25706 IX86_BUILTIN_VFMADDSUBPS
,
25707 IX86_BUILTIN_VFMADDSUBPD
,
25708 IX86_BUILTIN_VFMADDSUBPS256
,
25709 IX86_BUILTIN_VFMADDSUBPD256
,
25711 /* FMA3 instructions. */
25712 IX86_BUILTIN_VFMADDSS3
,
25713 IX86_BUILTIN_VFMADDSD3
,
25715 /* XOP instructions. */
25716 IX86_BUILTIN_VPCMOV
,
25717 IX86_BUILTIN_VPCMOV_V2DI
,
25718 IX86_BUILTIN_VPCMOV_V4SI
,
25719 IX86_BUILTIN_VPCMOV_V8HI
,
25720 IX86_BUILTIN_VPCMOV_V16QI
,
25721 IX86_BUILTIN_VPCMOV_V4SF
,
25722 IX86_BUILTIN_VPCMOV_V2DF
,
25723 IX86_BUILTIN_VPCMOV256
,
25724 IX86_BUILTIN_VPCMOV_V4DI256
,
25725 IX86_BUILTIN_VPCMOV_V8SI256
,
25726 IX86_BUILTIN_VPCMOV_V16HI256
,
25727 IX86_BUILTIN_VPCMOV_V32QI256
,
25728 IX86_BUILTIN_VPCMOV_V8SF256
,
25729 IX86_BUILTIN_VPCMOV_V4DF256
,
25731 IX86_BUILTIN_VPPERM
,
25733 IX86_BUILTIN_VPMACSSWW
,
25734 IX86_BUILTIN_VPMACSWW
,
25735 IX86_BUILTIN_VPMACSSWD
,
25736 IX86_BUILTIN_VPMACSWD
,
25737 IX86_BUILTIN_VPMACSSDD
,
25738 IX86_BUILTIN_VPMACSDD
,
25739 IX86_BUILTIN_VPMACSSDQL
,
25740 IX86_BUILTIN_VPMACSSDQH
,
25741 IX86_BUILTIN_VPMACSDQL
,
25742 IX86_BUILTIN_VPMACSDQH
,
25743 IX86_BUILTIN_VPMADCSSWD
,
25744 IX86_BUILTIN_VPMADCSWD
,
25746 IX86_BUILTIN_VPHADDBW
,
25747 IX86_BUILTIN_VPHADDBD
,
25748 IX86_BUILTIN_VPHADDBQ
,
25749 IX86_BUILTIN_VPHADDWD
,
25750 IX86_BUILTIN_VPHADDWQ
,
25751 IX86_BUILTIN_VPHADDDQ
,
25752 IX86_BUILTIN_VPHADDUBW
,
25753 IX86_BUILTIN_VPHADDUBD
,
25754 IX86_BUILTIN_VPHADDUBQ
,
25755 IX86_BUILTIN_VPHADDUWD
,
25756 IX86_BUILTIN_VPHADDUWQ
,
25757 IX86_BUILTIN_VPHADDUDQ
,
25758 IX86_BUILTIN_VPHSUBBW
,
25759 IX86_BUILTIN_VPHSUBWD
,
25760 IX86_BUILTIN_VPHSUBDQ
,
25762 IX86_BUILTIN_VPROTB
,
25763 IX86_BUILTIN_VPROTW
,
25764 IX86_BUILTIN_VPROTD
,
25765 IX86_BUILTIN_VPROTQ
,
25766 IX86_BUILTIN_VPROTB_IMM
,
25767 IX86_BUILTIN_VPROTW_IMM
,
25768 IX86_BUILTIN_VPROTD_IMM
,
25769 IX86_BUILTIN_VPROTQ_IMM
,
25771 IX86_BUILTIN_VPSHLB
,
25772 IX86_BUILTIN_VPSHLW
,
25773 IX86_BUILTIN_VPSHLD
,
25774 IX86_BUILTIN_VPSHLQ
,
25775 IX86_BUILTIN_VPSHAB
,
25776 IX86_BUILTIN_VPSHAW
,
25777 IX86_BUILTIN_VPSHAD
,
25778 IX86_BUILTIN_VPSHAQ
,
25780 IX86_BUILTIN_VFRCZSS
,
25781 IX86_BUILTIN_VFRCZSD
,
25782 IX86_BUILTIN_VFRCZPS
,
25783 IX86_BUILTIN_VFRCZPD
,
25784 IX86_BUILTIN_VFRCZPS256
,
25785 IX86_BUILTIN_VFRCZPD256
,
25787 IX86_BUILTIN_VPCOMEQUB
,
25788 IX86_BUILTIN_VPCOMNEUB
,
25789 IX86_BUILTIN_VPCOMLTUB
,
25790 IX86_BUILTIN_VPCOMLEUB
,
25791 IX86_BUILTIN_VPCOMGTUB
,
25792 IX86_BUILTIN_VPCOMGEUB
,
25793 IX86_BUILTIN_VPCOMFALSEUB
,
25794 IX86_BUILTIN_VPCOMTRUEUB
,
25796 IX86_BUILTIN_VPCOMEQUW
,
25797 IX86_BUILTIN_VPCOMNEUW
,
25798 IX86_BUILTIN_VPCOMLTUW
,
25799 IX86_BUILTIN_VPCOMLEUW
,
25800 IX86_BUILTIN_VPCOMGTUW
,
25801 IX86_BUILTIN_VPCOMGEUW
,
25802 IX86_BUILTIN_VPCOMFALSEUW
,
25803 IX86_BUILTIN_VPCOMTRUEUW
,
25805 IX86_BUILTIN_VPCOMEQUD
,
25806 IX86_BUILTIN_VPCOMNEUD
,
25807 IX86_BUILTIN_VPCOMLTUD
,
25808 IX86_BUILTIN_VPCOMLEUD
,
25809 IX86_BUILTIN_VPCOMGTUD
,
25810 IX86_BUILTIN_VPCOMGEUD
,
25811 IX86_BUILTIN_VPCOMFALSEUD
,
25812 IX86_BUILTIN_VPCOMTRUEUD
,
25814 IX86_BUILTIN_VPCOMEQUQ
,
25815 IX86_BUILTIN_VPCOMNEUQ
,
25816 IX86_BUILTIN_VPCOMLTUQ
,
25817 IX86_BUILTIN_VPCOMLEUQ
,
25818 IX86_BUILTIN_VPCOMGTUQ
,
25819 IX86_BUILTIN_VPCOMGEUQ
,
25820 IX86_BUILTIN_VPCOMFALSEUQ
,
25821 IX86_BUILTIN_VPCOMTRUEUQ
,
25823 IX86_BUILTIN_VPCOMEQB
,
25824 IX86_BUILTIN_VPCOMNEB
,
25825 IX86_BUILTIN_VPCOMLTB
,
25826 IX86_BUILTIN_VPCOMLEB
,
25827 IX86_BUILTIN_VPCOMGTB
,
25828 IX86_BUILTIN_VPCOMGEB
,
25829 IX86_BUILTIN_VPCOMFALSEB
,
25830 IX86_BUILTIN_VPCOMTRUEB
,
25832 IX86_BUILTIN_VPCOMEQW
,
25833 IX86_BUILTIN_VPCOMNEW
,
25834 IX86_BUILTIN_VPCOMLTW
,
25835 IX86_BUILTIN_VPCOMLEW
,
25836 IX86_BUILTIN_VPCOMGTW
,
25837 IX86_BUILTIN_VPCOMGEW
,
25838 IX86_BUILTIN_VPCOMFALSEW
,
25839 IX86_BUILTIN_VPCOMTRUEW
,
25841 IX86_BUILTIN_VPCOMEQD
,
25842 IX86_BUILTIN_VPCOMNED
,
25843 IX86_BUILTIN_VPCOMLTD
,
25844 IX86_BUILTIN_VPCOMLED
,
25845 IX86_BUILTIN_VPCOMGTD
,
25846 IX86_BUILTIN_VPCOMGED
,
25847 IX86_BUILTIN_VPCOMFALSED
,
25848 IX86_BUILTIN_VPCOMTRUED
,
25850 IX86_BUILTIN_VPCOMEQQ
,
25851 IX86_BUILTIN_VPCOMNEQ
,
25852 IX86_BUILTIN_VPCOMLTQ
,
25853 IX86_BUILTIN_VPCOMLEQ
,
25854 IX86_BUILTIN_VPCOMGTQ
,
25855 IX86_BUILTIN_VPCOMGEQ
,
25856 IX86_BUILTIN_VPCOMFALSEQ
,
25857 IX86_BUILTIN_VPCOMTRUEQ
,
25859 /* LWP instructions. */
25860 IX86_BUILTIN_LLWPCB
,
25861 IX86_BUILTIN_SLWPCB
,
25862 IX86_BUILTIN_LWPVAL32
,
25863 IX86_BUILTIN_LWPVAL64
,
25864 IX86_BUILTIN_LWPINS32
,
25865 IX86_BUILTIN_LWPINS64
,
25870 IX86_BUILTIN_XBEGIN
,
25872 IX86_BUILTIN_XABORT
,
25873 IX86_BUILTIN_XTEST
,
25875 /* BMI instructions. */
25876 IX86_BUILTIN_BEXTR32
,
25877 IX86_BUILTIN_BEXTR64
,
25880 /* TBM instructions. */
25881 IX86_BUILTIN_BEXTRI32
,
25882 IX86_BUILTIN_BEXTRI64
,
25884 /* BMI2 instructions. */
25885 IX86_BUILTIN_BZHI32
,
25886 IX86_BUILTIN_BZHI64
,
25887 IX86_BUILTIN_PDEP32
,
25888 IX86_BUILTIN_PDEP64
,
25889 IX86_BUILTIN_PEXT32
,
25890 IX86_BUILTIN_PEXT64
,
25892 /* FSGSBASE instructions. */
25893 IX86_BUILTIN_RDFSBASE32
,
25894 IX86_BUILTIN_RDFSBASE64
,
25895 IX86_BUILTIN_RDGSBASE32
,
25896 IX86_BUILTIN_RDGSBASE64
,
25897 IX86_BUILTIN_WRFSBASE32
,
25898 IX86_BUILTIN_WRFSBASE64
,
25899 IX86_BUILTIN_WRGSBASE32
,
25900 IX86_BUILTIN_WRGSBASE64
,
25902 /* RDRND instructions. */
25903 IX86_BUILTIN_RDRAND16_STEP
,
25904 IX86_BUILTIN_RDRAND32_STEP
,
25905 IX86_BUILTIN_RDRAND64_STEP
,
25907 /* F16C instructions. */
25908 IX86_BUILTIN_CVTPH2PS
,
25909 IX86_BUILTIN_CVTPH2PS256
,
25910 IX86_BUILTIN_CVTPS2PH
,
25911 IX86_BUILTIN_CVTPS2PH256
,
25913 /* CFString built-in for darwin */
25914 IX86_BUILTIN_CFSTRING
,
25916 /* Builtins to get CPU type and supported features. */
25917 IX86_BUILTIN_CPU_INIT
,
25918 IX86_BUILTIN_CPU_IS
,
25919 IX86_BUILTIN_CPU_SUPPORTS
,
25924 /* Table for the ix86 builtin decls. */
25925 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
25927 /* Table of all of the builtin functions that are possible with different ISA's
25928 but are waiting to be built until a function is declared to use that
25930 struct builtin_isa
{
25931 const char *name
; /* function name */
25932 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
25933 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
25934 bool const_p
; /* true if the declaration is constant */
25935 bool set_and_not_built_p
;
25938 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
25941 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25942 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25943 function decl in the ix86_builtins array. Returns the function decl or
25944 NULL_TREE, if the builtin was not added.
25946 If the front end has a special hook for builtin functions, delay adding
25947 builtin functions that aren't in the current ISA until the ISA is changed
25948 with function specific optimization. Doing so, can save about 300K for the
25949 default compiler. When the builtin is expanded, check at that time whether
25952 If the front end doesn't have a special hook, record all builtins, even if
25953 it isn't an instruction set in the current ISA in case the user uses
25954 function specific options for a different ISA, so that we don't get scope
25955 errors if a builtin is added in the middle of a function scope. */
25958 def_builtin (HOST_WIDE_INT mask
, const char *name
,
25959 enum ix86_builtin_func_type tcode
,
25960 enum ix86_builtins code
)
25962 tree decl
= NULL_TREE
;
25964 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
25966 ix86_builtins_isa
[(int) code
].isa
= mask
;
25968 mask
&= ~OPTION_MASK_ISA_64BIT
;
25970 || (mask
& ix86_isa_flags
) != 0
25971 || (lang_hooks
.builtin_function
25972 == lang_hooks
.builtin_function_ext_scope
))
25975 tree type
= ix86_get_builtin_func_type (tcode
);
25976 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
25978 ix86_builtins
[(int) code
] = decl
;
25979 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
25983 ix86_builtins
[(int) code
] = NULL_TREE
;
25984 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
25985 ix86_builtins_isa
[(int) code
].name
= name
;
25986 ix86_builtins_isa
[(int) code
].const_p
= false;
25987 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
25994 /* Like def_builtin, but also marks the function decl "const". */
25997 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
25998 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26000 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26002 TREE_READONLY (decl
) = 1;
26004 ix86_builtins_isa
[(int) code
].const_p
= true;
26009 /* Add any new builtin functions for a given ISA that may not have been
26010 declared. This saves a bit of space compared to adding all of the
26011 declarations to the tree, even if we didn't use them. */
26014 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26018 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26020 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26021 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26025 /* Don't define the builtin again. */
26026 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26028 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26029 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26030 type
, i
, BUILT_IN_MD
, NULL
,
26033 ix86_builtins
[i
] = decl
;
26034 if (ix86_builtins_isa
[i
].const_p
)
26035 TREE_READONLY (decl
) = 1;
26040 /* Bits for builtin_description.flag. */
26042 /* Set when we don't support the comparison natively, and should
26043 swap_comparison in order to support it. */
26044 #define BUILTIN_DESC_SWAP_OPERANDS 1
26046 struct builtin_description
26048 const HOST_WIDE_INT mask
;
26049 const enum insn_code icode
;
26050 const char *const name
;
26051 const enum ix86_builtins code
;
26052 const enum rtx_code comparison
;
26056 static const struct builtin_description bdesc_comi
[] =
26058 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26059 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26060 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26061 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26062 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26063 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26064 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26065 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26066 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26067 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26068 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26069 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26070 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26071 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26072 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26073 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26074 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26075 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26076 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26077 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26078 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26079 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26080 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26081 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26084 static const struct builtin_description bdesc_pcmpestr
[] =
26087 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26088 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26089 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26090 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26091 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26092 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26093 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26096 static const struct builtin_description bdesc_pcmpistr
[] =
26099 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26100 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26101 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26102 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26103 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26104 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26105 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26108 /* Special builtins with variable number of arguments. */
26109 static const struct builtin_description bdesc_special_args
[] =
26111 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26112 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26113 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26116 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26119 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26122 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26123 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26124 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26126 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26127 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26128 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26129 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26131 /* SSE or 3DNow!A */
26132 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26133 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26136 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26137 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26138 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26139 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26140 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26141 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26142 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26143 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26144 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26145 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26147 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26148 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26151 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26154 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26157 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26158 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26161 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26162 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26164 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26165 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26166 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26167 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26168 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26170 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26171 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26172 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26173 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26174 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26175 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26176 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26178 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26179 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26180 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26182 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26183 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26184 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26185 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26186 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26187 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26188 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26189 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26192 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26193 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26194 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26195 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26196 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26197 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26198 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26199 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26200 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26202 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26203 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26204 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26205 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26206 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26207 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26210 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26211 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26212 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26213 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26214 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26215 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26216 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26217 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26220 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26221 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26222 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26225 /* Builtins with variable number of arguments. */
26226 static const struct builtin_description bdesc_args
[] =
26228 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26229 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26230 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26231 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26232 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26233 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26234 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26237 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26238 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26239 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26240 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26241 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26242 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26244 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26245 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26246 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26247 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26248 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26249 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26250 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26251 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26253 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26254 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26256 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26257 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26258 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26259 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26261 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26262 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26263 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26264 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26265 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26266 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26268 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26269 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26270 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26271 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26272 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26273 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26275 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26276 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26277 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26279 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26281 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26282 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26283 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26284 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26285 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26286 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26288 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26289 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26290 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26291 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26292 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26293 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26295 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26296 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26297 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26298 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26301 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26302 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26303 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26304 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26306 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26307 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26308 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26309 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26310 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26311 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26312 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26313 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26314 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26315 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26316 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26317 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26318 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26319 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26320 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26323 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26324 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26325 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26326 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26327 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26328 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26331 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26332 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26333 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26334 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26335 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26336 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26337 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26338 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26339 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26340 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26341 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26342 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26344 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26346 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26347 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26348 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26349 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26350 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26351 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26352 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26353 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26355 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26356 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26357 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26358 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26359 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26360 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26361 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26362 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26363 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26364 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26365 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26366 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26367 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26368 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26369 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26370 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26371 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26372 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26373 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26374 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26375 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26376 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26378 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26379 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26380 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26381 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26383 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26384 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26385 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26386 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26388 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26390 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26391 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26392 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26393 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26394 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26396 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26397 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26398 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26400 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26402 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26403 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26404 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26406 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26407 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26409 /* SSE MMX or 3Dnow!A */
26410 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26411 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26412 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26414 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26415 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26416 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26417 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26419 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26420 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26422 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26425 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26428 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26429 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26430 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26431 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26433 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26434 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26435 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26436 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26437 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26439 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26441 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26442 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26443 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26444 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26446 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26447 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26448 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26450 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26451 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26452 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26453 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26454 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26455 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26456 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26457 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26459 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26460 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26461 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26462 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26463 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26464 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26465 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26466 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26467 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26468 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26469 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26470 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26471 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26472 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26473 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26474 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26475 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26476 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26477 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26478 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26480 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26481 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26482 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26483 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26485 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26486 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26487 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26488 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26490 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26492 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26493 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26494 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26496 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26498 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26499 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26500 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26501 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26502 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26504 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26505 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26507 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26508 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26509 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26510 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26511 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26512 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26513 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26514 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26516 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26517 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26519 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26520 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26521 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26522 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26524 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26525 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26527 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26528 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26529 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26530 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26531 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26532 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26534 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26535 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26536 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26537 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26539 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26540 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26541 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26542 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26543 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26544 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26545 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26546 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26548 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26549 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26550 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26552 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26553 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26555 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26556 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26558 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26560 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26561 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26562 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26563 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26565 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26566 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26567 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26568 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26569 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26570 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26571 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26573 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26574 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26575 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26576 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26577 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26578 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26579 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26581 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26582 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26583 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26584 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26586 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26587 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26588 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26590 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26592 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26595 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26596 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26599 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26600 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26602 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26603 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26604 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26605 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26606 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26607 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26610 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26611 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26612 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26613 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26614 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26615 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26617 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26618 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26619 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26620 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26621 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26622 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26623 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26624 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26625 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26626 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26627 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26628 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26629 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26630 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26631 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26632 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26633 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26634 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26635 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26636 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26637 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26638 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26639 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26640 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26643 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26644 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26647 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26648 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26649 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26650 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26651 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26652 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26653 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26654 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26655 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26656 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26658 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26659 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26660 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26661 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26662 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26663 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26664 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26665 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26666 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26667 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26668 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26669 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26670 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26672 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26673 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26674 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26675 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26676 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26677 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26678 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26679 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26680 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26681 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26682 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26683 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26686 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26687 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26688 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26689 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26691 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26692 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26693 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26694 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26696 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26697 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26699 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26700 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26702 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26703 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26704 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26705 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26707 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26708 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26710 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26711 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26713 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26714 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26715 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26718 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26719 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26720 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26721 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26722 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26725 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26726 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26727 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26728 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26731 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26732 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26735 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26736 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26740 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26743 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26744 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26745 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26746 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26747 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26748 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26749 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26750 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26751 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26752 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26753 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26754 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26755 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26756 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26757 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26758 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26759 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26760 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26761 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26762 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26763 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26764 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26765 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26766 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26767 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26768 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26770 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26771 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26772 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26773 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26775 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26776 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26777 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26778 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26779 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26780 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26781 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26782 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26783 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26784 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26785 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26786 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26787 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26788 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26789 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26790 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26791 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26792 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26793 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26794 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26795 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
26796 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26797 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26798 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26799 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26800 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26801 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26802 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26803 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26804 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26805 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26806 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
26807 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
26808 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
26810 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26811 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26812 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26814 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26815 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26816 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26817 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26818 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26820 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26822 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26823 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26825 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26826 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
26827 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
26828 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26830 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26831 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26833 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26834 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26836 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26837 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
26838 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
26839 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26841 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
26842 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
26844 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26845 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26847 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26848 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26849 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26850 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26852 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26853 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26854 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26855 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
26856 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
26857 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
26859 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26860 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26861 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26862 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26863 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26864 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26865 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26866 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26867 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26868 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26869 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26870 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26871 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26872 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26873 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26875 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
26876 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
26878 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26879 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26881 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26884 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
26885 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
26886 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
26887 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
26888 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26889 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26890 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26891 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26892 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26893 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26894 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26895 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26896 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26897 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26898 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26899 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26900 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
26901 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26902 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26903 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26904 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26905 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
26906 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
26907 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26908 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26909 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26910 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26911 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26912 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26913 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26914 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26915 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26916 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26917 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26918 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26919 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26920 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26921 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26922 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
26923 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26924 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26925 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26926 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26927 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26928 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26929 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26930 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26931 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26932 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26933 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26934 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26935 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
26936 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26937 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26938 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26939 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26940 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26941 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26942 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26943 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26944 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26945 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26946 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26947 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26948 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mulv4siv4di3
, "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26949 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26950 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26951 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26952 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26953 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26954 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulv4siv4di3
, "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26955 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26956 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26957 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26958 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
26959 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26960 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26961 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26962 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26963 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26964 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26965 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26966 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26967 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26968 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26969 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26970 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26971 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26972 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26973 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26974 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26975 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26976 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26977 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26978 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26979 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26980 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26981 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26982 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26983 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26984 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26985 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26986 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26987 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26988 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26989 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26990 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26991 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26992 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26993 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26994 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26995 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26996 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26997 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26998 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26999 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27000 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27001 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27002 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27003 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27004 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27005 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27006 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27007 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27008 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27009 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27010 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27011 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27012 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27013 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27014 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27015 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27016 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27017 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27018 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27019 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27020 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27021 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27022 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27023 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27024 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27025 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27026 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27027 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27028 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27029 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27031 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27034 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27035 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27036 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27039 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27040 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27043 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27044 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27045 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27046 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27049 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27050 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27051 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27052 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27053 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27054 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27057 /* FMA4 and XOP. */
27058 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27059 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27060 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27061 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27062 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27063 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27064 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27065 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27066 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27067 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27068 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27069 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27070 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27071 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27072 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27073 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27074 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27075 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27076 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27077 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27078 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27079 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27080 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27081 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27082 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27083 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27084 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27085 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27086 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27087 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27088 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27089 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27090 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27091 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27092 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27093 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27094 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27095 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27096 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27097 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27098 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27099 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27100 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27101 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27102 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27103 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27104 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27105 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27106 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27107 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27108 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27109 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27111 static const struct builtin_description bdesc_multi_arg
[] =
27113 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27114 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27115 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27116 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27117 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27118 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27120 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27121 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27122 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27123 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27124 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27125 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27127 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27128 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27129 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27130 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27131 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27132 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27133 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27134 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27135 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27136 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27137 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27138 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27140 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27141 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27142 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27143 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27144 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27145 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27146 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27147 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27148 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27149 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27150 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27151 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27153 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27154 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27155 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27156 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27157 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27158 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27159 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27161 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27162 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27163 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27164 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27165 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27166 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27167 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27169 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27171 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27172 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27173 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27174 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27175 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27176 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27177 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27178 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27179 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27180 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27181 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27182 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27184 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27185 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27186 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27187 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27188 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27189 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27190 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27191 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27192 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27193 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27194 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27195 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27196 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27197 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27198 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27199 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27201 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27202 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27203 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27204 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27205 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27206 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27208 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27209 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27210 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27211 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27212 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27213 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27214 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27215 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27216 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27217 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27218 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27219 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27220 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27221 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27222 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27224 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27225 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27226 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27227 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27228 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27229 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27230 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27232 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27233 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27234 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27235 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27236 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27237 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27238 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27240 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27241 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27242 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27243 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27244 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27245 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27246 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27248 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27249 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27250 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27251 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27252 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27253 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27254 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27256 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27257 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27258 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27259 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27260 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27261 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27262 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27264 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27265 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27266 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27267 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27268 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27269 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27270 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27272 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27273 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27274 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27275 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27276 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27277 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27278 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27280 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27281 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27282 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27283 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27284 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27285 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27286 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27288 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27289 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27290 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27291 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27292 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27293 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27294 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27295 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27297 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27298 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27299 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27300 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27301 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27302 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27303 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27304 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27306 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27307 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27308 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27309 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27313 /* TM vector builtins. */
27315 /* Reuse the existing x86-specific `struct builtin_description' cause
27316 we're lazy. Add casts to make them fit. */
27317 static const struct builtin_description bdesc_tm
[] =
27319 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27320 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27321 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27322 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27323 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27324 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27325 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27327 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27328 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27329 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27330 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27331 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27332 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27333 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27335 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27336 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27337 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27338 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27339 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27340 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27341 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27343 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27344 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27345 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27348 /* TM callbacks. */
27350 /* Return the builtin decl needed to load a vector of TYPE. */
27353 ix86_builtin_tm_load (tree type
)
27355 if (TREE_CODE (type
) == VECTOR_TYPE
)
27357 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27360 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27362 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27364 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27370 /* Return the builtin decl needed to store a vector of TYPE. */
27373 ix86_builtin_tm_store (tree type
)
27375 if (TREE_CODE (type
) == VECTOR_TYPE
)
27377 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27380 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27382 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27384 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27390 /* Initialize the transactional memory vector load/store builtins. */
27393 ix86_init_tm_builtins (void)
27395 enum ix86_builtin_func_type ftype
;
27396 const struct builtin_description
*d
;
27399 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27400 tree attrs_log
, attrs_type_log
;
27405 /* If there are no builtins defined, we must be compiling in a
27406 language without trans-mem support. */
27407 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
27410 /* Use whatever attributes a normal TM load has. */
27411 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27412 attrs_load
= DECL_ATTRIBUTES (decl
);
27413 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27414 /* Use whatever attributes a normal TM store has. */
27415 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27416 attrs_store
= DECL_ATTRIBUTES (decl
);
27417 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27418 /* Use whatever attributes a normal TM log has. */
27419 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27420 attrs_log
= DECL_ATTRIBUTES (decl
);
27421 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27423 for (i
= 0, d
= bdesc_tm
;
27424 i
< ARRAY_SIZE (bdesc_tm
);
27427 if ((d
->mask
& ix86_isa_flags
) != 0
27428 || (lang_hooks
.builtin_function
27429 == lang_hooks
.builtin_function_ext_scope
))
27431 tree type
, attrs
, attrs_type
;
27432 enum built_in_function code
= (enum built_in_function
) d
->code
;
27434 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27435 type
= ix86_get_builtin_func_type (ftype
);
27437 if (BUILTIN_TM_LOAD_P (code
))
27439 attrs
= attrs_load
;
27440 attrs_type
= attrs_type_load
;
27442 else if (BUILTIN_TM_STORE_P (code
))
27444 attrs
= attrs_store
;
27445 attrs_type
= attrs_type_store
;
27450 attrs_type
= attrs_type_log
;
27452 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27453 /* The builtin without the prefix for
27454 calling it directly. */
27455 d
->name
+ strlen ("__builtin_"),
27457 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27458 set the TYPE_ATTRIBUTES. */
27459 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27461 set_builtin_decl (code
, decl
, false);
27466 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27467 in the current target ISA to allow the user to compile particular modules
27468 with different target specific options that differ from the command line
27471 ix86_init_mmx_sse_builtins (void)
27473 const struct builtin_description
* d
;
27474 enum ix86_builtin_func_type ftype
;
27477 /* Add all special builtins with variable number of operands. */
27478 for (i
= 0, d
= bdesc_special_args
;
27479 i
< ARRAY_SIZE (bdesc_special_args
);
27485 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27486 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27489 /* Add all builtins with variable number of operands. */
27490 for (i
= 0, d
= bdesc_args
;
27491 i
< ARRAY_SIZE (bdesc_args
);
27497 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27498 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27501 /* pcmpestr[im] insns. */
27502 for (i
= 0, d
= bdesc_pcmpestr
;
27503 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27506 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27507 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27509 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27510 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27513 /* pcmpistr[im] insns. */
27514 for (i
= 0, d
= bdesc_pcmpistr
;
27515 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27518 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27519 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27521 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27522 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27525 /* comi/ucomi insns. */
27526 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27528 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27529 ftype
= INT_FTYPE_V2DF_V2DF
;
27531 ftype
= INT_FTYPE_V4SF_V4SF
;
27532 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27536 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27537 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27538 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27539 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27541 /* SSE or 3DNow!A */
27542 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27543 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27544 IX86_BUILTIN_MASKMOVQ
);
27547 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27548 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27550 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27551 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27552 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27553 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27556 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27557 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27558 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27559 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27562 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27563 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27564 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27565 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27566 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27567 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27568 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27569 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27570 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27571 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27572 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27573 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27576 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27577 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27580 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27581 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27582 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27583 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27584 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27585 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27586 IX86_BUILTIN_RDRAND64_STEP
);
27589 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27590 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27591 IX86_BUILTIN_GATHERSIV2DF
);
27593 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27594 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27595 IX86_BUILTIN_GATHERSIV4DF
);
27597 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27598 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27599 IX86_BUILTIN_GATHERDIV2DF
);
27601 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27602 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27603 IX86_BUILTIN_GATHERDIV4DF
);
27605 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27606 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27607 IX86_BUILTIN_GATHERSIV4SF
);
27609 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27610 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27611 IX86_BUILTIN_GATHERSIV8SF
);
27613 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27614 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27615 IX86_BUILTIN_GATHERDIV4SF
);
27617 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27618 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27619 IX86_BUILTIN_GATHERDIV8SF
);
27621 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27622 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27623 IX86_BUILTIN_GATHERSIV2DI
);
27625 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27626 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27627 IX86_BUILTIN_GATHERSIV4DI
);
27629 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27630 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27631 IX86_BUILTIN_GATHERDIV2DI
);
27633 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27634 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27635 IX86_BUILTIN_GATHERDIV4DI
);
27637 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27638 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27639 IX86_BUILTIN_GATHERSIV4SI
);
27641 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27642 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27643 IX86_BUILTIN_GATHERSIV8SI
);
27645 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27646 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27647 IX86_BUILTIN_GATHERDIV4SI
);
27649 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27650 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27651 IX86_BUILTIN_GATHERDIV8SI
);
27653 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27654 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27655 IX86_BUILTIN_GATHERALTSIV4DF
);
27657 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27658 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27659 IX86_BUILTIN_GATHERALTDIV8SF
);
27661 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27662 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27663 IX86_BUILTIN_GATHERALTSIV4DI
);
27665 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27666 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27667 IX86_BUILTIN_GATHERALTDIV8SI
);
27670 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
27671 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
27673 /* MMX access to the vec_init patterns. */
27674 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27675 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27677 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27678 V4HI_FTYPE_HI_HI_HI_HI
,
27679 IX86_BUILTIN_VEC_INIT_V4HI
);
27681 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27682 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27683 IX86_BUILTIN_VEC_INIT_V8QI
);
27685 /* Access to the vec_extract patterns. */
27686 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27687 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27688 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27689 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27690 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27691 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27692 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27693 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27694 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27695 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27697 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27698 "__builtin_ia32_vec_ext_v4hi",
27699 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27701 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27702 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27704 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27705 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27707 /* Access to the vec_set patterns. */
27708 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27709 "__builtin_ia32_vec_set_v2di",
27710 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27712 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27713 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27715 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27716 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27718 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27719 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27721 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27722 "__builtin_ia32_vec_set_v4hi",
27723 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27725 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27726 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27728 /* Add FMA4 multi-arg argument instructions */
27729 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27734 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27735 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27739 /* This builds the processor_model struct type defined in
27740 libgcc/config/i386/cpuinfo.c */
27743 build_processor_model_struct (void)
27745 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
27747 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
27749 tree type
= make_node (RECORD_TYPE
);
27751 /* The first 3 fields are unsigned int. */
27752 for (i
= 0; i
< 3; ++i
)
27754 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
27755 get_identifier (field_name
[i
]), unsigned_type_node
);
27756 if (field_chain
!= NULL_TREE
)
27757 DECL_CHAIN (field
) = field_chain
;
27758 field_chain
= field
;
27761 /* The last field is an array of unsigned integers of size one. */
27762 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
27763 get_identifier (field_name
[3]),
27764 build_array_type (unsigned_type_node
,
27765 build_index_type (size_one_node
)));
27766 if (field_chain
!= NULL_TREE
)
27767 DECL_CHAIN (field
) = field_chain
;
27768 field_chain
= field
;
27770 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
27774 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
27777 make_var_decl (tree type
, const char *name
)
27781 new_decl
= build_decl (UNKNOWN_LOCATION
,
27783 get_identifier(name
),
27786 DECL_EXTERNAL (new_decl
) = 1;
27787 TREE_STATIC (new_decl
) = 1;
27788 TREE_PUBLIC (new_decl
) = 1;
27789 DECL_INITIAL (new_decl
) = 0;
27790 DECL_ARTIFICIAL (new_decl
) = 0;
27791 DECL_PRESERVE_P (new_decl
) = 1;
27793 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
27794 assemble_variable (new_decl
, 0, 0, 0);
27799 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
27800 into an integer defined in libgcc/config/i386/cpuinfo.c */
27803 fold_builtin_cpu (tree fndecl
, tree
*args
)
27806 enum ix86_builtins fn_code
= (enum ix86_builtins
)
27807 DECL_FUNCTION_CODE (fndecl
);
27808 tree param_string_cst
= NULL
;
27810 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
27811 enum processor_features
27827 /* These are the values for vendor types and cpu types and subtypes
27828 in cpuinfo.c. Cpu types and subtypes should be subtracted by
27829 the corresponding start value. */
27830 enum processor_model
27840 M_CPU_SUBTYPE_START
,
27841 M_INTEL_COREI7_NEHALEM
,
27842 M_INTEL_COREI7_WESTMERE
,
27843 M_INTEL_COREI7_SANDYBRIDGE
,
27844 M_AMDFAM10H_BARCELONA
,
27845 M_AMDFAM10H_SHANGHAI
,
27846 M_AMDFAM10H_ISTANBUL
,
27847 M_AMDFAM15H_BDVER1
,
27851 static struct _arch_names_table
27853 const char *const name
;
27854 const enum processor_model model
;
27856 const arch_names_table
[] =
27859 {"intel", M_INTEL
},
27860 {"atom", M_INTEL_ATOM
},
27861 {"core2", M_INTEL_CORE2
},
27862 {"corei7", M_INTEL_COREI7
},
27863 {"nehalem", M_INTEL_COREI7_NEHALEM
},
27864 {"westmere", M_INTEL_COREI7_WESTMERE
},
27865 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
27866 {"amdfam10h", M_AMDFAM10H
},
27867 {"barcelona", M_AMDFAM10H_BARCELONA
},
27868 {"shanghai", M_AMDFAM10H_SHANGHAI
},
27869 {"istanbul", M_AMDFAM10H_ISTANBUL
},
27870 {"amdfam15h", M_AMDFAM15H
},
27871 {"bdver1", M_AMDFAM15H_BDVER1
},
27872 {"bdver2", M_AMDFAM15H_BDVER2
},
27875 static struct _isa_names_table
27877 const char *const name
;
27878 const enum processor_features feature
;
27880 const isa_names_table
[] =
27884 {"popcnt", F_POPCNT
},
27888 {"ssse3", F_SSSE3
},
27889 {"sse4.1", F_SSE4_1
},
27890 {"sse4.2", F_SSE4_2
},
27895 static tree __processor_model_type
= NULL_TREE
;
27896 static tree __cpu_model_var
= NULL_TREE
;
27898 if (__processor_model_type
== NULL_TREE
)
27899 __processor_model_type
= build_processor_model_struct ();
27901 if (__cpu_model_var
== NULL_TREE
)
27902 __cpu_model_var
= make_var_decl (__processor_model_type
,
27905 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
27907 param_string_cst
= *args
;
27908 while (param_string_cst
27909 && TREE_CODE (param_string_cst
) != STRING_CST
)
27911 /* *args must be a expr that can contain other EXPRS leading to a
27913 if (!EXPR_P (param_string_cst
))
27915 error ("Parameter to builtin must be a string constant or literal");
27916 return integer_zero_node
;
27918 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
27921 gcc_assert (param_string_cst
);
27923 if (fn_code
== IX86_BUILTIN_CPU_IS
)
27927 unsigned int field_val
= 0;
27928 unsigned int NUM_ARCH_NAMES
27929 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
27931 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
27932 if (strcmp (arch_names_table
[i
].name
,
27933 TREE_STRING_POINTER (param_string_cst
)) == 0)
27936 if (i
== NUM_ARCH_NAMES
)
27938 error ("Parameter to builtin not valid: %s",
27939 TREE_STRING_POINTER (param_string_cst
));
27940 return integer_zero_node
;
27943 field
= TYPE_FIELDS (__processor_model_type
);
27944 field_val
= arch_names_table
[i
].model
;
27946 /* CPU types are stored in the next field. */
27947 if (field_val
> M_CPU_TYPE_START
27948 && field_val
< M_CPU_SUBTYPE_START
)
27950 field
= DECL_CHAIN (field
);
27951 field_val
-= M_CPU_TYPE_START
;
27954 /* CPU subtypes are stored in the next field. */
27955 if (field_val
> M_CPU_SUBTYPE_START
)
27957 field
= DECL_CHAIN ( DECL_CHAIN (field
));
27958 field_val
-= M_CPU_SUBTYPE_START
;
27961 /* Get the appropriate field in __cpu_model. */
27962 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
27965 /* Check the value. */
27966 return build2 (EQ_EXPR
, unsigned_type_node
, ref
,
27967 build_int_cstu (unsigned_type_node
, field_val
));
27969 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
27974 unsigned int field_val
= 0;
27975 unsigned int NUM_ISA_NAMES
27976 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
27978 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
27979 if (strcmp (isa_names_table
[i
].name
,
27980 TREE_STRING_POINTER (param_string_cst
)) == 0)
27983 if (i
== NUM_ISA_NAMES
)
27985 error ("Parameter to builtin not valid: %s",
27986 TREE_STRING_POINTER (param_string_cst
));
27987 return integer_zero_node
;
27990 field
= TYPE_FIELDS (__processor_model_type
);
27991 /* Get the last field, which is __cpu_features. */
27992 while (DECL_CHAIN (field
))
27993 field
= DECL_CHAIN (field
);
27995 /* Get the appropriate field: __cpu_model.__cpu_features */
27996 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
27999 /* Access the 0th element of __cpu_features array. */
28000 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
28001 integer_zero_node
, NULL_TREE
, NULL_TREE
);
28003 field_val
= (1 << isa_names_table
[i
].feature
);
28004 /* Return __cpu_model.__cpu_features[0] & field_val */
28005 return build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
28006 build_int_cstu (unsigned_type_node
, field_val
));
28008 gcc_unreachable ();
28012 ix86_fold_builtin (tree fndecl
, int n_args
,
28013 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
28015 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
28017 enum ix86_builtins fn_code
= (enum ix86_builtins
)
28018 DECL_FUNCTION_CODE (fndecl
);
28019 if (fn_code
== IX86_BUILTIN_CPU_IS
28020 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28022 gcc_assert (n_args
== 1);
28023 return fold_builtin_cpu (fndecl
, args
);
28030 /* Make builtins to detect cpu type and features supported. NAME is
28031 the builtin name, CODE is the builtin code, and FTYPE is the function
28032 type of the builtin. */
28035 make_cpu_type_builtin (const char* name
, int code
,
28036 enum ix86_builtin_func_type ftype
, bool is_const
)
28041 type
= ix86_get_builtin_func_type (ftype
);
28042 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
28044 gcc_assert (decl
!= NULL_TREE
);
28045 ix86_builtins
[(int) code
] = decl
;
28046 TREE_READONLY (decl
) = is_const
;
28049 /* Make builtins to get CPU type and features supported. The created
28052 __builtin_cpu_init (), to detect cpu type and features,
28053 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
28054 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
28058 ix86_init_platform_type_builtins (void)
28060 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
28061 INT_FTYPE_VOID
, false);
28062 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
28063 INT_FTYPE_PCCHAR
, true);
28064 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
28065 INT_FTYPE_PCCHAR
, true);
28068 /* Internal method for ix86_init_builtins. */
28071 ix86_init_builtins_va_builtins_abi (void)
28073 tree ms_va_ref
, sysv_va_ref
;
28074 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
28075 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
28076 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
28077 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
28081 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
28082 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
28083 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
28085 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
28088 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28089 fnvoid_va_start_ms
=
28090 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28091 fnvoid_va_end_sysv
=
28092 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
28093 fnvoid_va_start_sysv
=
28094 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
28096 fnvoid_va_copy_ms
=
28097 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
28099 fnvoid_va_copy_sysv
=
28100 build_function_type_list (void_type_node
, sysv_va_ref
,
28101 sysv_va_ref
, NULL_TREE
);
28103 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
28104 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28105 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
28106 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28107 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
28108 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28109 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
28110 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28111 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
28112 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28113 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
28114 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28118 ix86_init_builtin_types (void)
28120 tree float128_type_node
, float80_type_node
;
28122 /* The __float80 type. */
28123 float80_type_node
= long_double_type_node
;
28124 if (TYPE_MODE (float80_type_node
) != XFmode
)
28126 /* The __float80 type. */
28127 float80_type_node
= make_node (REAL_TYPE
);
28129 TYPE_PRECISION (float80_type_node
) = 80;
28130 layout_type (float80_type_node
);
28132 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
28134 /* The __float128 type. */
28135 float128_type_node
= make_node (REAL_TYPE
);
28136 TYPE_PRECISION (float128_type_node
) = 128;
28137 layout_type (float128_type_node
);
28138 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
28140 /* This macro is built by i386-builtin-types.awk. */
28141 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
28145 ix86_init_builtins (void)
28149 ix86_init_builtin_types ();
28151 /* Builtins to get CPU type and features. */
28152 ix86_init_platform_type_builtins ();
28154 /* TFmode support builtins. */
28155 def_builtin_const (0, "__builtin_infq",
28156 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
28157 def_builtin_const (0, "__builtin_huge_valq",
28158 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
28160 /* We will expand them to normal call if SSE isn't available since
28161 they are used by libgcc. */
28162 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
28163 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
28164 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
28165 TREE_READONLY (t
) = 1;
28166 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
28168 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
28169 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
28170 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
28171 TREE_READONLY (t
) = 1;
28172 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
28174 ix86_init_tm_builtins ();
28175 ix86_init_mmx_sse_builtins ();
28178 ix86_init_builtins_va_builtins_abi ();
28180 #ifdef SUBTARGET_INIT_BUILTINS
28181 SUBTARGET_INIT_BUILTINS
;
28185 /* Return the ix86 builtin for CODE. */
28188 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
28190 if (code
>= IX86_BUILTIN_MAX
)
28191 return error_mark_node
;
28193 return ix86_builtins
[code
];
28196 /* Errors in the source file can cause expand_expr to return const0_rtx
28197 where we expect a vector. To avoid crashing, use one of the vector
28198 clear instructions. */
28200 safe_vector_operand (rtx x
, enum machine_mode mode
)
28202 if (x
== const0_rtx
)
28203 x
= CONST0_RTX (mode
);
28207 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
28210 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
28213 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28214 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28215 rtx op0
= expand_normal (arg0
);
28216 rtx op1
= expand_normal (arg1
);
28217 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28218 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28219 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
28221 if (VECTOR_MODE_P (mode0
))
28222 op0
= safe_vector_operand (op0
, mode0
);
28223 if (VECTOR_MODE_P (mode1
))
28224 op1
= safe_vector_operand (op1
, mode1
);
28226 if (optimize
|| !target
28227 || GET_MODE (target
) != tmode
28228 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28229 target
= gen_reg_rtx (tmode
);
28231 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
28233 rtx x
= gen_reg_rtx (V4SImode
);
28234 emit_insn (gen_sse2_loadd (x
, op1
));
28235 op1
= gen_lowpart (TImode
, x
);
28238 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28239 op0
= copy_to_mode_reg (mode0
, op0
);
28240 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
28241 op1
= copy_to_mode_reg (mode1
, op1
);
28243 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28252 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
28255 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
28256 enum ix86_builtin_func_type m_type
,
28257 enum rtx_code sub_code
)
28262 bool comparison_p
= false;
28264 bool last_arg_constant
= false;
28265 int num_memory
= 0;
28268 enum machine_mode mode
;
28271 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28275 case MULTI_ARG_4_DF2_DI_I
:
28276 case MULTI_ARG_4_DF2_DI_I1
:
28277 case MULTI_ARG_4_SF2_SI_I
:
28278 case MULTI_ARG_4_SF2_SI_I1
:
28280 last_arg_constant
= true;
28283 case MULTI_ARG_3_SF
:
28284 case MULTI_ARG_3_DF
:
28285 case MULTI_ARG_3_SF2
:
28286 case MULTI_ARG_3_DF2
:
28287 case MULTI_ARG_3_DI
:
28288 case MULTI_ARG_3_SI
:
28289 case MULTI_ARG_3_SI_DI
:
28290 case MULTI_ARG_3_HI
:
28291 case MULTI_ARG_3_HI_SI
:
28292 case MULTI_ARG_3_QI
:
28293 case MULTI_ARG_3_DI2
:
28294 case MULTI_ARG_3_SI2
:
28295 case MULTI_ARG_3_HI2
:
28296 case MULTI_ARG_3_QI2
:
28300 case MULTI_ARG_2_SF
:
28301 case MULTI_ARG_2_DF
:
28302 case MULTI_ARG_2_DI
:
28303 case MULTI_ARG_2_SI
:
28304 case MULTI_ARG_2_HI
:
28305 case MULTI_ARG_2_QI
:
28309 case MULTI_ARG_2_DI_IMM
:
28310 case MULTI_ARG_2_SI_IMM
:
28311 case MULTI_ARG_2_HI_IMM
:
28312 case MULTI_ARG_2_QI_IMM
:
28314 last_arg_constant
= true;
28317 case MULTI_ARG_1_SF
:
28318 case MULTI_ARG_1_DF
:
28319 case MULTI_ARG_1_SF2
:
28320 case MULTI_ARG_1_DF2
:
28321 case MULTI_ARG_1_DI
:
28322 case MULTI_ARG_1_SI
:
28323 case MULTI_ARG_1_HI
:
28324 case MULTI_ARG_1_QI
:
28325 case MULTI_ARG_1_SI_DI
:
28326 case MULTI_ARG_1_HI_DI
:
28327 case MULTI_ARG_1_HI_SI
:
28328 case MULTI_ARG_1_QI_DI
:
28329 case MULTI_ARG_1_QI_SI
:
28330 case MULTI_ARG_1_QI_HI
:
28334 case MULTI_ARG_2_DI_CMP
:
28335 case MULTI_ARG_2_SI_CMP
:
28336 case MULTI_ARG_2_HI_CMP
:
28337 case MULTI_ARG_2_QI_CMP
:
28339 comparison_p
= true;
28342 case MULTI_ARG_2_SF_TF
:
28343 case MULTI_ARG_2_DF_TF
:
28344 case MULTI_ARG_2_DI_TF
:
28345 case MULTI_ARG_2_SI_TF
:
28346 case MULTI_ARG_2_HI_TF
:
28347 case MULTI_ARG_2_QI_TF
:
28353 gcc_unreachable ();
28356 if (optimize
|| !target
28357 || GET_MODE (target
) != tmode
28358 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28359 target
= gen_reg_rtx (tmode
);
28361 gcc_assert (nargs
<= 4);
28363 for (i
= 0; i
< nargs
; i
++)
28365 tree arg
= CALL_EXPR_ARG (exp
, i
);
28366 rtx op
= expand_normal (arg
);
28367 int adjust
= (comparison_p
) ? 1 : 0;
28368 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
28370 if (last_arg_constant
&& i
== nargs
- 1)
28372 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
28374 enum insn_code new_icode
= icode
;
28377 case CODE_FOR_xop_vpermil2v2df3
:
28378 case CODE_FOR_xop_vpermil2v4sf3
:
28379 case CODE_FOR_xop_vpermil2v4df3
:
28380 case CODE_FOR_xop_vpermil2v8sf3
:
28381 error ("the last argument must be a 2-bit immediate");
28382 return gen_reg_rtx (tmode
);
28383 case CODE_FOR_xop_rotlv2di3
:
28384 new_icode
= CODE_FOR_rotlv2di3
;
28386 case CODE_FOR_xop_rotlv4si3
:
28387 new_icode
= CODE_FOR_rotlv4si3
;
28389 case CODE_FOR_xop_rotlv8hi3
:
28390 new_icode
= CODE_FOR_rotlv8hi3
;
28392 case CODE_FOR_xop_rotlv16qi3
:
28393 new_icode
= CODE_FOR_rotlv16qi3
;
28395 if (CONST_INT_P (op
))
28397 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
28398 op
= GEN_INT (INTVAL (op
) & mask
);
28399 gcc_checking_assert
28400 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
28404 gcc_checking_assert
28406 && insn_data
[new_icode
].operand
[0].mode
== tmode
28407 && insn_data
[new_icode
].operand
[1].mode
== tmode
28408 && insn_data
[new_icode
].operand
[2].mode
== mode
28409 && insn_data
[new_icode
].operand
[0].predicate
28410 == insn_data
[icode
].operand
[0].predicate
28411 && insn_data
[new_icode
].operand
[1].predicate
28412 == insn_data
[icode
].operand
[1].predicate
);
28418 gcc_unreachable ();
28425 if (VECTOR_MODE_P (mode
))
28426 op
= safe_vector_operand (op
, mode
);
28428 /* If we aren't optimizing, only allow one memory operand to be
28430 if (memory_operand (op
, mode
))
28433 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
28436 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
28438 op
= force_reg (mode
, op
);
28442 args
[i
].mode
= mode
;
28448 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
28453 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
28454 GEN_INT ((int)sub_code
));
28455 else if (! comparison_p
)
28456 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
28459 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
28463 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
28468 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
28472 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
28476 gcc_unreachable ();
28486 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28487 insns with vec_merge. */
28490 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
28494 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28495 rtx op1
, op0
= expand_normal (arg0
);
28496 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28497 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28499 if (optimize
|| !target
28500 || GET_MODE (target
) != tmode
28501 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28502 target
= gen_reg_rtx (tmode
);
28504 if (VECTOR_MODE_P (mode0
))
28505 op0
= safe_vector_operand (op0
, mode0
);
28507 if ((optimize
&& !register_operand (op0
, mode0
))
28508 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28509 op0
= copy_to_mode_reg (mode0
, op0
);
28512 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
28513 op1
= copy_to_mode_reg (mode0
, op1
);
28515 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28522 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28525 ix86_expand_sse_compare (const struct builtin_description
*d
,
28526 tree exp
, rtx target
, bool swap
)
28529 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28530 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28531 rtx op0
= expand_normal (arg0
);
28532 rtx op1
= expand_normal (arg1
);
28534 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28535 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28536 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28537 enum rtx_code comparison
= d
->comparison
;
28539 if (VECTOR_MODE_P (mode0
))
28540 op0
= safe_vector_operand (op0
, mode0
);
28541 if (VECTOR_MODE_P (mode1
))
28542 op1
= safe_vector_operand (op1
, mode1
);
28544 /* Swap operands if we have a comparison that isn't available in
28548 rtx tmp
= gen_reg_rtx (mode1
);
28549 emit_move_insn (tmp
, op1
);
28554 if (optimize
|| !target
28555 || GET_MODE (target
) != tmode
28556 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28557 target
= gen_reg_rtx (tmode
);
28559 if ((optimize
&& !register_operand (op0
, mode0
))
28560 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
28561 op0
= copy_to_mode_reg (mode0
, op0
);
28562 if ((optimize
&& !register_operand (op1
, mode1
))
28563 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
28564 op1
= copy_to_mode_reg (mode1
, op1
);
28566 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
28567 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28574 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28577 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28581 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28582 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28583 rtx op0
= expand_normal (arg0
);
28584 rtx op1
= expand_normal (arg1
);
28585 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28586 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28587 enum rtx_code comparison
= d
->comparison
;
28589 if (VECTOR_MODE_P (mode0
))
28590 op0
= safe_vector_operand (op0
, mode0
);
28591 if (VECTOR_MODE_P (mode1
))
28592 op1
= safe_vector_operand (op1
, mode1
);
28594 /* Swap operands if we have a comparison that isn't available in
28596 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28603 target
= gen_reg_rtx (SImode
);
28604 emit_move_insn (target
, const0_rtx
);
28605 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28607 if ((optimize
&& !register_operand (op0
, mode0
))
28608 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28609 op0
= copy_to_mode_reg (mode0
, op0
);
28610 if ((optimize
&& !register_operand (op1
, mode1
))
28611 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28612 op1
= copy_to_mode_reg (mode1
, op1
);
28614 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28618 emit_insn (gen_rtx_SET (VOIDmode
,
28619 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28620 gen_rtx_fmt_ee (comparison
, QImode
,
28624 return SUBREG_REG (target
);
28627 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28630 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28634 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28635 rtx op1
, op0
= expand_normal (arg0
);
28636 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28637 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28639 if (optimize
|| target
== 0
28640 || GET_MODE (target
) != tmode
28641 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28642 target
= gen_reg_rtx (tmode
);
28644 if (VECTOR_MODE_P (mode0
))
28645 op0
= safe_vector_operand (op0
, mode0
);
28647 if ((optimize
&& !register_operand (op0
, mode0
))
28648 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28649 op0
= copy_to_mode_reg (mode0
, op0
);
28651 op1
= GEN_INT (d
->comparison
);
28653 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28661 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28662 tree exp
, rtx target
)
28665 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28666 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28667 rtx op0
= expand_normal (arg0
);
28668 rtx op1
= expand_normal (arg1
);
28670 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28671 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28672 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28674 if (optimize
|| target
== 0
28675 || GET_MODE (target
) != tmode
28676 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28677 target
= gen_reg_rtx (tmode
);
28679 op0
= safe_vector_operand (op0
, mode0
);
28680 op1
= safe_vector_operand (op1
, mode1
);
28682 if ((optimize
&& !register_operand (op0
, mode0
))
28683 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28684 op0
= copy_to_mode_reg (mode0
, op0
);
28685 if ((optimize
&& !register_operand (op1
, mode1
))
28686 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28687 op1
= copy_to_mode_reg (mode1
, op1
);
28689 op2
= GEN_INT (d
->comparison
);
28691 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28698 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28701 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
28705 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28706 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28707 rtx op0
= expand_normal (arg0
);
28708 rtx op1
= expand_normal (arg1
);
28709 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28710 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28711 enum rtx_code comparison
= d
->comparison
;
28713 if (VECTOR_MODE_P (mode0
))
28714 op0
= safe_vector_operand (op0
, mode0
);
28715 if (VECTOR_MODE_P (mode1
))
28716 op1
= safe_vector_operand (op1
, mode1
);
28718 target
= gen_reg_rtx (SImode
);
28719 emit_move_insn (target
, const0_rtx
);
28720 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28722 if ((optimize
&& !register_operand (op0
, mode0
))
28723 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28724 op0
= copy_to_mode_reg (mode0
, op0
);
28725 if ((optimize
&& !register_operand (op1
, mode1
))
28726 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28727 op1
= copy_to_mode_reg (mode1
, op1
);
28729 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28733 emit_insn (gen_rtx_SET (VOIDmode
,
28734 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28735 gen_rtx_fmt_ee (comparison
, QImode
,
28739 return SUBREG_REG (target
);
28742 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28745 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
28746 tree exp
, rtx target
)
28749 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28750 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28751 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28752 tree arg3
= CALL_EXPR_ARG (exp
, 3);
28753 tree arg4
= CALL_EXPR_ARG (exp
, 4);
28754 rtx scratch0
, scratch1
;
28755 rtx op0
= expand_normal (arg0
);
28756 rtx op1
= expand_normal (arg1
);
28757 rtx op2
= expand_normal (arg2
);
28758 rtx op3
= expand_normal (arg3
);
28759 rtx op4
= expand_normal (arg4
);
28760 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
28762 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28763 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28764 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28765 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
28766 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
28767 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
28768 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
28770 if (VECTOR_MODE_P (modev2
))
28771 op0
= safe_vector_operand (op0
, modev2
);
28772 if (VECTOR_MODE_P (modev4
))
28773 op2
= safe_vector_operand (op2
, modev4
);
28775 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28776 op0
= copy_to_mode_reg (modev2
, op0
);
28777 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
28778 op1
= copy_to_mode_reg (modei3
, op1
);
28779 if ((optimize
&& !register_operand (op2
, modev4
))
28780 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
28781 op2
= copy_to_mode_reg (modev4
, op2
);
28782 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
28783 op3
= copy_to_mode_reg (modei5
, op3
);
28785 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
28787 error ("the fifth argument must be an 8-bit immediate");
28791 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
28793 if (optimize
|| !target
28794 || GET_MODE (target
) != tmode0
28795 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28796 target
= gen_reg_rtx (tmode0
);
28798 scratch1
= gen_reg_rtx (tmode1
);
28800 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28802 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28804 if (optimize
|| !target
28805 || GET_MODE (target
) != tmode1
28806 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28807 target
= gen_reg_rtx (tmode1
);
28809 scratch0
= gen_reg_rtx (tmode0
);
28811 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
28815 gcc_assert (d
->flag
);
28817 scratch0
= gen_reg_rtx (tmode0
);
28818 scratch1
= gen_reg_rtx (tmode1
);
28820 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28830 target
= gen_reg_rtx (SImode
);
28831 emit_move_insn (target
, const0_rtx
);
28832 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28835 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28836 gen_rtx_fmt_ee (EQ
, QImode
,
28837 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28840 return SUBREG_REG (target
);
28847 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28850 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
28851 tree exp
, rtx target
)
28854 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28855 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28856 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28857 rtx scratch0
, scratch1
;
28858 rtx op0
= expand_normal (arg0
);
28859 rtx op1
= expand_normal (arg1
);
28860 rtx op2
= expand_normal (arg2
);
28861 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
28863 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28864 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28865 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28866 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
28867 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
28869 if (VECTOR_MODE_P (modev2
))
28870 op0
= safe_vector_operand (op0
, modev2
);
28871 if (VECTOR_MODE_P (modev3
))
28872 op1
= safe_vector_operand (op1
, modev3
);
28874 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28875 op0
= copy_to_mode_reg (modev2
, op0
);
28876 if ((optimize
&& !register_operand (op1
, modev3
))
28877 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
28878 op1
= copy_to_mode_reg (modev3
, op1
);
28880 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
28882 error ("the third argument must be an 8-bit immediate");
28886 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
28888 if (optimize
|| !target
28889 || GET_MODE (target
) != tmode0
28890 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28891 target
= gen_reg_rtx (tmode0
);
28893 scratch1
= gen_reg_rtx (tmode1
);
28895 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
28897 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28899 if (optimize
|| !target
28900 || GET_MODE (target
) != tmode1
28901 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28902 target
= gen_reg_rtx (tmode1
);
28904 scratch0
= gen_reg_rtx (tmode0
);
28906 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
28910 gcc_assert (d
->flag
);
28912 scratch0
= gen_reg_rtx (tmode0
);
28913 scratch1
= gen_reg_rtx (tmode1
);
28915 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
28925 target
= gen_reg_rtx (SImode
);
28926 emit_move_insn (target
, const0_rtx
);
28927 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28930 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28931 gen_rtx_fmt_ee (EQ
, QImode
,
28932 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28935 return SUBREG_REG (target
);
28941 /* Subroutine of ix86_expand_builtin to take care of insns with
28942 variable number of operands. */
28945 ix86_expand_args_builtin (const struct builtin_description
*d
,
28946 tree exp
, rtx target
)
28948 rtx pat
, real_target
;
28949 unsigned int i
, nargs
;
28950 unsigned int nargs_constant
= 0;
28951 int num_memory
= 0;
28955 enum machine_mode mode
;
28957 bool last_arg_count
= false;
28958 enum insn_code icode
= d
->icode
;
28959 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28960 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28961 enum machine_mode rmode
= VOIDmode
;
28963 enum rtx_code comparison
= d
->comparison
;
28965 switch ((enum ix86_builtin_func_type
) d
->flag
)
28967 case V2DF_FTYPE_V2DF_ROUND
:
28968 case V4DF_FTYPE_V4DF_ROUND
:
28969 case V4SF_FTYPE_V4SF_ROUND
:
28970 case V8SF_FTYPE_V8SF_ROUND
:
28971 case V4SI_FTYPE_V4SF_ROUND
:
28972 case V8SI_FTYPE_V8SF_ROUND
:
28973 return ix86_expand_sse_round (d
, exp
, target
);
28974 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
28975 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
28976 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
28977 case INT_FTYPE_V8SF_V8SF_PTEST
:
28978 case INT_FTYPE_V4DI_V4DI_PTEST
:
28979 case INT_FTYPE_V4DF_V4DF_PTEST
:
28980 case INT_FTYPE_V4SF_V4SF_PTEST
:
28981 case INT_FTYPE_V2DI_V2DI_PTEST
:
28982 case INT_FTYPE_V2DF_V2DF_PTEST
:
28983 return ix86_expand_sse_ptest (d
, exp
, target
);
28984 case FLOAT128_FTYPE_FLOAT128
:
28985 case FLOAT_FTYPE_FLOAT
:
28986 case INT_FTYPE_INT
:
28987 case UINT64_FTYPE_INT
:
28988 case UINT16_FTYPE_UINT16
:
28989 case INT64_FTYPE_INT64
:
28990 case INT64_FTYPE_V4SF
:
28991 case INT64_FTYPE_V2DF
:
28992 case INT_FTYPE_V16QI
:
28993 case INT_FTYPE_V8QI
:
28994 case INT_FTYPE_V8SF
:
28995 case INT_FTYPE_V4DF
:
28996 case INT_FTYPE_V4SF
:
28997 case INT_FTYPE_V2DF
:
28998 case INT_FTYPE_V32QI
:
28999 case V16QI_FTYPE_V16QI
:
29000 case V8SI_FTYPE_V8SF
:
29001 case V8SI_FTYPE_V4SI
:
29002 case V8HI_FTYPE_V8HI
:
29003 case V8HI_FTYPE_V16QI
:
29004 case V8QI_FTYPE_V8QI
:
29005 case V8SF_FTYPE_V8SF
:
29006 case V8SF_FTYPE_V8SI
:
29007 case V8SF_FTYPE_V4SF
:
29008 case V8SF_FTYPE_V8HI
:
29009 case V4SI_FTYPE_V4SI
:
29010 case V4SI_FTYPE_V16QI
:
29011 case V4SI_FTYPE_V4SF
:
29012 case V4SI_FTYPE_V8SI
:
29013 case V4SI_FTYPE_V8HI
:
29014 case V4SI_FTYPE_V4DF
:
29015 case V4SI_FTYPE_V2DF
:
29016 case V4HI_FTYPE_V4HI
:
29017 case V4DF_FTYPE_V4DF
:
29018 case V4DF_FTYPE_V4SI
:
29019 case V4DF_FTYPE_V4SF
:
29020 case V4DF_FTYPE_V2DF
:
29021 case V4SF_FTYPE_V4SF
:
29022 case V4SF_FTYPE_V4SI
:
29023 case V4SF_FTYPE_V8SF
:
29024 case V4SF_FTYPE_V4DF
:
29025 case V4SF_FTYPE_V8HI
:
29026 case V4SF_FTYPE_V2DF
:
29027 case V2DI_FTYPE_V2DI
:
29028 case V2DI_FTYPE_V16QI
:
29029 case V2DI_FTYPE_V8HI
:
29030 case V2DI_FTYPE_V4SI
:
29031 case V2DF_FTYPE_V2DF
:
29032 case V2DF_FTYPE_V4SI
:
29033 case V2DF_FTYPE_V4DF
:
29034 case V2DF_FTYPE_V4SF
:
29035 case V2DF_FTYPE_V2SI
:
29036 case V2SI_FTYPE_V2SI
:
29037 case V2SI_FTYPE_V4SF
:
29038 case V2SI_FTYPE_V2SF
:
29039 case V2SI_FTYPE_V2DF
:
29040 case V2SF_FTYPE_V2SF
:
29041 case V2SF_FTYPE_V2SI
:
29042 case V32QI_FTYPE_V32QI
:
29043 case V32QI_FTYPE_V16QI
:
29044 case V16HI_FTYPE_V16HI
:
29045 case V16HI_FTYPE_V8HI
:
29046 case V8SI_FTYPE_V8SI
:
29047 case V16HI_FTYPE_V16QI
:
29048 case V8SI_FTYPE_V16QI
:
29049 case V4DI_FTYPE_V16QI
:
29050 case V8SI_FTYPE_V8HI
:
29051 case V4DI_FTYPE_V8HI
:
29052 case V4DI_FTYPE_V4SI
:
29053 case V4DI_FTYPE_V2DI
:
29056 case V4SF_FTYPE_V4SF_VEC_MERGE
:
29057 case V2DF_FTYPE_V2DF_VEC_MERGE
:
29058 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
29059 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
29060 case V16QI_FTYPE_V16QI_V16QI
:
29061 case V16QI_FTYPE_V8HI_V8HI
:
29062 case V8QI_FTYPE_V8QI_V8QI
:
29063 case V8QI_FTYPE_V4HI_V4HI
:
29064 case V8HI_FTYPE_V8HI_V8HI
:
29065 case V8HI_FTYPE_V16QI_V16QI
:
29066 case V8HI_FTYPE_V4SI_V4SI
:
29067 case V8SF_FTYPE_V8SF_V8SF
:
29068 case V8SF_FTYPE_V8SF_V8SI
:
29069 case V4SI_FTYPE_V4SI_V4SI
:
29070 case V4SI_FTYPE_V8HI_V8HI
:
29071 case V4SI_FTYPE_V4SF_V4SF
:
29072 case V4SI_FTYPE_V2DF_V2DF
:
29073 case V4HI_FTYPE_V4HI_V4HI
:
29074 case V4HI_FTYPE_V8QI_V8QI
:
29075 case V4HI_FTYPE_V2SI_V2SI
:
29076 case V4DF_FTYPE_V4DF_V4DF
:
29077 case V4DF_FTYPE_V4DF_V4DI
:
29078 case V4SF_FTYPE_V4SF_V4SF
:
29079 case V4SF_FTYPE_V4SF_V4SI
:
29080 case V4SF_FTYPE_V4SF_V2SI
:
29081 case V4SF_FTYPE_V4SF_V2DF
:
29082 case V4SF_FTYPE_V4SF_DI
:
29083 case V4SF_FTYPE_V4SF_SI
:
29084 case V2DI_FTYPE_V2DI_V2DI
:
29085 case V2DI_FTYPE_V16QI_V16QI
:
29086 case V2DI_FTYPE_V4SI_V4SI
:
29087 case V2DI_FTYPE_V2DI_V16QI
:
29088 case V2DI_FTYPE_V2DF_V2DF
:
29089 case V2SI_FTYPE_V2SI_V2SI
:
29090 case V2SI_FTYPE_V4HI_V4HI
:
29091 case V2SI_FTYPE_V2SF_V2SF
:
29092 case V2DF_FTYPE_V2DF_V2DF
:
29093 case V2DF_FTYPE_V2DF_V4SF
:
29094 case V2DF_FTYPE_V2DF_V2DI
:
29095 case V2DF_FTYPE_V2DF_DI
:
29096 case V2DF_FTYPE_V2DF_SI
:
29097 case V2SF_FTYPE_V2SF_V2SF
:
29098 case V1DI_FTYPE_V1DI_V1DI
:
29099 case V1DI_FTYPE_V8QI_V8QI
:
29100 case V1DI_FTYPE_V2SI_V2SI
:
29101 case V32QI_FTYPE_V16HI_V16HI
:
29102 case V16HI_FTYPE_V8SI_V8SI
:
29103 case V32QI_FTYPE_V32QI_V32QI
:
29104 case V16HI_FTYPE_V32QI_V32QI
:
29105 case V16HI_FTYPE_V16HI_V16HI
:
29106 case V8SI_FTYPE_V4DF_V4DF
:
29107 case V8SI_FTYPE_V8SI_V8SI
:
29108 case V8SI_FTYPE_V16HI_V16HI
:
29109 case V4DI_FTYPE_V4DI_V4DI
:
29110 case V4DI_FTYPE_V8SI_V8SI
:
29111 if (comparison
== UNKNOWN
)
29112 return ix86_expand_binop_builtin (icode
, exp
, target
);
29115 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
29116 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
29117 gcc_assert (comparison
!= UNKNOWN
);
29121 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
29122 case V16HI_FTYPE_V16HI_SI_COUNT
:
29123 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
29124 case V8SI_FTYPE_V8SI_SI_COUNT
:
29125 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
29126 case V4DI_FTYPE_V4DI_INT_COUNT
:
29127 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
29128 case V8HI_FTYPE_V8HI_SI_COUNT
:
29129 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
29130 case V4SI_FTYPE_V4SI_SI_COUNT
:
29131 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
29132 case V4HI_FTYPE_V4HI_SI_COUNT
:
29133 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
29134 case V2DI_FTYPE_V2DI_SI_COUNT
:
29135 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
29136 case V2SI_FTYPE_V2SI_SI_COUNT
:
29137 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
29138 case V1DI_FTYPE_V1DI_SI_COUNT
:
29140 last_arg_count
= true;
29142 case UINT64_FTYPE_UINT64_UINT64
:
29143 case UINT_FTYPE_UINT_UINT
:
29144 case UINT_FTYPE_UINT_USHORT
:
29145 case UINT_FTYPE_UINT_UCHAR
:
29146 case UINT16_FTYPE_UINT16_INT
:
29147 case UINT8_FTYPE_UINT8_INT
:
29150 case V2DI_FTYPE_V2DI_INT_CONVERT
:
29153 nargs_constant
= 1;
29155 case V4DI_FTYPE_V4DI_INT_CONVERT
:
29158 nargs_constant
= 1;
29160 case V8HI_FTYPE_V8HI_INT
:
29161 case V8HI_FTYPE_V8SF_INT
:
29162 case V8HI_FTYPE_V4SF_INT
:
29163 case V8SF_FTYPE_V8SF_INT
:
29164 case V4SI_FTYPE_V4SI_INT
:
29165 case V4SI_FTYPE_V8SI_INT
:
29166 case V4HI_FTYPE_V4HI_INT
:
29167 case V4DF_FTYPE_V4DF_INT
:
29168 case V4SF_FTYPE_V4SF_INT
:
29169 case V4SF_FTYPE_V8SF_INT
:
29170 case V2DI_FTYPE_V2DI_INT
:
29171 case V2DF_FTYPE_V2DF_INT
:
29172 case V2DF_FTYPE_V4DF_INT
:
29173 case V16HI_FTYPE_V16HI_INT
:
29174 case V8SI_FTYPE_V8SI_INT
:
29175 case V4DI_FTYPE_V4DI_INT
:
29176 case V2DI_FTYPE_V4DI_INT
:
29178 nargs_constant
= 1;
29180 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
29181 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
29182 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
29183 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
29184 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
29185 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
29188 case V32QI_FTYPE_V32QI_V32QI_INT
:
29189 case V16HI_FTYPE_V16HI_V16HI_INT
:
29190 case V16QI_FTYPE_V16QI_V16QI_INT
:
29191 case V4DI_FTYPE_V4DI_V4DI_INT
:
29192 case V8HI_FTYPE_V8HI_V8HI_INT
:
29193 case V8SI_FTYPE_V8SI_V8SI_INT
:
29194 case V8SI_FTYPE_V8SI_V4SI_INT
:
29195 case V8SF_FTYPE_V8SF_V8SF_INT
:
29196 case V8SF_FTYPE_V8SF_V4SF_INT
:
29197 case V4SI_FTYPE_V4SI_V4SI_INT
:
29198 case V4DF_FTYPE_V4DF_V4DF_INT
:
29199 case V4DF_FTYPE_V4DF_V2DF_INT
:
29200 case V4SF_FTYPE_V4SF_V4SF_INT
:
29201 case V2DI_FTYPE_V2DI_V2DI_INT
:
29202 case V4DI_FTYPE_V4DI_V2DI_INT
:
29203 case V2DF_FTYPE_V2DF_V2DF_INT
:
29205 nargs_constant
= 1;
29207 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
29210 nargs_constant
= 1;
29212 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
29215 nargs_constant
= 1;
29217 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
29220 nargs_constant
= 1;
29222 case V2DI_FTYPE_V2DI_UINT_UINT
:
29224 nargs_constant
= 2;
29226 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
29227 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
29228 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
29229 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
29231 nargs_constant
= 1;
29233 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
29235 nargs_constant
= 2;
29238 gcc_unreachable ();
29241 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29243 if (comparison
!= UNKNOWN
)
29245 gcc_assert (nargs
== 2);
29246 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
29249 if (rmode
== VOIDmode
|| rmode
== tmode
)
29253 || GET_MODE (target
) != tmode
29254 || !insn_p
->operand
[0].predicate (target
, tmode
))
29255 target
= gen_reg_rtx (tmode
);
29256 real_target
= target
;
29260 target
= gen_reg_rtx (rmode
);
29261 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
29264 for (i
= 0; i
< nargs
; i
++)
29266 tree arg
= CALL_EXPR_ARG (exp
, i
);
29267 rtx op
= expand_normal (arg
);
29268 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29269 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29271 if (last_arg_count
&& (i
+ 1) == nargs
)
29273 /* SIMD shift insns take either an 8-bit immediate or
29274 register as count. But builtin functions take int as
29275 count. If count doesn't match, we put it in register. */
29278 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
29279 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
29280 op
= copy_to_reg (op
);
29283 else if ((nargs
- i
) <= nargs_constant
)
29288 case CODE_FOR_avx2_inserti128
:
29289 case CODE_FOR_avx2_extracti128
:
29290 error ("the last argument must be an 1-bit immediate");
29293 case CODE_FOR_sse4_1_roundsd
:
29294 case CODE_FOR_sse4_1_roundss
:
29296 case CODE_FOR_sse4_1_roundpd
:
29297 case CODE_FOR_sse4_1_roundps
:
29298 case CODE_FOR_avx_roundpd256
:
29299 case CODE_FOR_avx_roundps256
:
29301 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
29302 case CODE_FOR_sse4_1_roundps_sfix
:
29303 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
29304 case CODE_FOR_avx_roundps_sfix256
:
29306 case CODE_FOR_sse4_1_blendps
:
29307 case CODE_FOR_avx_blendpd256
:
29308 case CODE_FOR_avx_vpermilv4df
:
29309 error ("the last argument must be a 4-bit immediate");
29312 case CODE_FOR_sse4_1_blendpd
:
29313 case CODE_FOR_avx_vpermilv2df
:
29314 case CODE_FOR_xop_vpermil2v2df3
:
29315 case CODE_FOR_xop_vpermil2v4sf3
:
29316 case CODE_FOR_xop_vpermil2v4df3
:
29317 case CODE_FOR_xop_vpermil2v8sf3
:
29318 error ("the last argument must be a 2-bit immediate");
29321 case CODE_FOR_avx_vextractf128v4df
:
29322 case CODE_FOR_avx_vextractf128v8sf
:
29323 case CODE_FOR_avx_vextractf128v8si
:
29324 case CODE_FOR_avx_vinsertf128v4df
:
29325 case CODE_FOR_avx_vinsertf128v8sf
:
29326 case CODE_FOR_avx_vinsertf128v8si
:
29327 error ("the last argument must be a 1-bit immediate");
29330 case CODE_FOR_avx_vmcmpv2df3
:
29331 case CODE_FOR_avx_vmcmpv4sf3
:
29332 case CODE_FOR_avx_cmpv2df3
:
29333 case CODE_FOR_avx_cmpv4sf3
:
29334 case CODE_FOR_avx_cmpv4df3
:
29335 case CODE_FOR_avx_cmpv8sf3
:
29336 error ("the last argument must be a 5-bit immediate");
29340 switch (nargs_constant
)
29343 if ((nargs
- i
) == nargs_constant
)
29345 error ("the next to last argument must be an 8-bit immediate");
29349 error ("the last argument must be an 8-bit immediate");
29352 gcc_unreachable ();
29359 if (VECTOR_MODE_P (mode
))
29360 op
= safe_vector_operand (op
, mode
);
29362 /* If we aren't optimizing, only allow one memory operand to
29364 if (memory_operand (op
, mode
))
29367 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
29369 if (optimize
|| !match
|| num_memory
> 1)
29370 op
= copy_to_mode_reg (mode
, op
);
29374 op
= copy_to_reg (op
);
29375 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
29380 args
[i
].mode
= mode
;
29386 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
29389 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
29392 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29396 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29397 args
[2].op
, args
[3].op
);
29400 gcc_unreachable ();
29410 /* Subroutine of ix86_expand_builtin to take care of special insns
29411 with variable number of operands. */
29414 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
29415 tree exp
, rtx target
)
29419 unsigned int i
, nargs
, arg_adjust
, memory
;
29423 enum machine_mode mode
;
29425 enum insn_code icode
= d
->icode
;
29426 bool last_arg_constant
= false;
29427 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29428 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29429 enum { load
, store
} klass
;
29431 switch ((enum ix86_builtin_func_type
) d
->flag
)
29433 case VOID_FTYPE_VOID
:
29434 if (icode
== CODE_FOR_avx_vzeroupper
)
29435 target
= GEN_INT (vzeroupper_intrinsic
);
29436 emit_insn (GEN_FCN (icode
) (target
));
29438 case VOID_FTYPE_UINT64
:
29439 case VOID_FTYPE_UNSIGNED
:
29445 case INT_FTYPE_VOID
:
29446 case UINT64_FTYPE_VOID
:
29447 case UNSIGNED_FTYPE_VOID
:
29452 case UINT64_FTYPE_PUNSIGNED
:
29453 case V2DI_FTYPE_PV2DI
:
29454 case V4DI_FTYPE_PV4DI
:
29455 case V32QI_FTYPE_PCCHAR
:
29456 case V16QI_FTYPE_PCCHAR
:
29457 case V8SF_FTYPE_PCV4SF
:
29458 case V8SF_FTYPE_PCFLOAT
:
29459 case V4SF_FTYPE_PCFLOAT
:
29460 case V4DF_FTYPE_PCV2DF
:
29461 case V4DF_FTYPE_PCDOUBLE
:
29462 case V2DF_FTYPE_PCDOUBLE
:
29463 case VOID_FTYPE_PVOID
:
29468 case VOID_FTYPE_PV2SF_V4SF
:
29469 case VOID_FTYPE_PV4DI_V4DI
:
29470 case VOID_FTYPE_PV2DI_V2DI
:
29471 case VOID_FTYPE_PCHAR_V32QI
:
29472 case VOID_FTYPE_PCHAR_V16QI
:
29473 case VOID_FTYPE_PFLOAT_V8SF
:
29474 case VOID_FTYPE_PFLOAT_V4SF
:
29475 case VOID_FTYPE_PDOUBLE_V4DF
:
29476 case VOID_FTYPE_PDOUBLE_V2DF
:
29477 case VOID_FTYPE_PLONGLONG_LONGLONG
:
29478 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
29479 case VOID_FTYPE_PINT_INT
:
29482 /* Reserve memory operand for target. */
29483 memory
= ARRAY_SIZE (args
);
29485 case V4SF_FTYPE_V4SF_PCV2SF
:
29486 case V2DF_FTYPE_V2DF_PCDOUBLE
:
29491 case V8SF_FTYPE_PCV8SF_V8SI
:
29492 case V4DF_FTYPE_PCV4DF_V4DI
:
29493 case V4SF_FTYPE_PCV4SF_V4SI
:
29494 case V2DF_FTYPE_PCV2DF_V2DI
:
29495 case V8SI_FTYPE_PCV8SI_V8SI
:
29496 case V4DI_FTYPE_PCV4DI_V4DI
:
29497 case V4SI_FTYPE_PCV4SI_V4SI
:
29498 case V2DI_FTYPE_PCV2DI_V2DI
:
29503 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
29504 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
29505 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
29506 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
29507 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
29508 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
29509 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
29510 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
29513 /* Reserve memory operand for target. */
29514 memory
= ARRAY_SIZE (args
);
29516 case VOID_FTYPE_UINT_UINT_UINT
:
29517 case VOID_FTYPE_UINT64_UINT_UINT
:
29518 case UCHAR_FTYPE_UINT_UINT_UINT
:
29519 case UCHAR_FTYPE_UINT64_UINT_UINT
:
29522 memory
= ARRAY_SIZE (args
);
29523 last_arg_constant
= true;
29526 gcc_unreachable ();
29529 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29531 if (klass
== store
)
29533 arg
= CALL_EXPR_ARG (exp
, 0);
29534 op
= expand_normal (arg
);
29535 gcc_assert (target
== 0);
29538 if (GET_MODE (op
) != Pmode
)
29539 op
= convert_to_mode (Pmode
, op
, 1);
29540 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
29543 target
= force_reg (tmode
, op
);
29551 || !register_operand (target
, tmode
)
29552 || GET_MODE (target
) != tmode
)
29553 target
= gen_reg_rtx (tmode
);
29556 for (i
= 0; i
< nargs
; i
++)
29558 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29561 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
29562 op
= expand_normal (arg
);
29563 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29565 if (last_arg_constant
&& (i
+ 1) == nargs
)
29569 if (icode
== CODE_FOR_lwp_lwpvalsi3
29570 || icode
== CODE_FOR_lwp_lwpinssi3
29571 || icode
== CODE_FOR_lwp_lwpvaldi3
29572 || icode
== CODE_FOR_lwp_lwpinsdi3
)
29573 error ("the last argument must be a 32-bit immediate");
29575 error ("the last argument must be an 8-bit immediate");
29583 /* This must be the memory operand. */
29584 if (GET_MODE (op
) != Pmode
)
29585 op
= convert_to_mode (Pmode
, op
, 1);
29586 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29587 gcc_assert (GET_MODE (op
) == mode
29588 || GET_MODE (op
) == VOIDmode
);
29592 /* This must be register. */
29593 if (VECTOR_MODE_P (mode
))
29594 op
= safe_vector_operand (op
, mode
);
29596 gcc_assert (GET_MODE (op
) == mode
29597 || GET_MODE (op
) == VOIDmode
);
29598 op
= copy_to_mode_reg (mode
, op
);
29603 args
[i
].mode
= mode
;
29609 pat
= GEN_FCN (icode
) (target
);
29612 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29615 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29618 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29621 gcc_unreachable ();
29627 return klass
== store
? 0 : target
;
29630 /* Return the integer constant in ARG. Constrain it to be in the range
29631 of the subparts of VEC_TYPE; issue an error if not. */
29634 get_element_number (tree vec_type
, tree arg
)
29636 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29638 if (!host_integerp (arg
, 1)
29639 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29641 error ("selector must be an integer constant in the range 0..%wi", max
);
29648 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29649 ix86_expand_vector_init. We DO have language-level syntax for this, in
29650 the form of (type){ init-list }. Except that since we can't place emms
29651 instructions from inside the compiler, we can't allow the use of MMX
29652 registers unless the user explicitly asks for it. So we do *not* define
29653 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29654 we have builtins invoked by mmintrin.h that gives us license to emit
29655 these sorts of instructions. */
29658 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29660 enum machine_mode tmode
= TYPE_MODE (type
);
29661 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29662 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29663 rtvec v
= rtvec_alloc (n_elt
);
29665 gcc_assert (VECTOR_MODE_P (tmode
));
29666 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29668 for (i
= 0; i
< n_elt
; ++i
)
29670 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29671 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29674 if (!target
|| !register_operand (target
, tmode
))
29675 target
= gen_reg_rtx (tmode
);
29677 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29681 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29682 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29683 had a language-level syntax for referencing vector elements. */
29686 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29688 enum machine_mode tmode
, mode0
;
29693 arg0
= CALL_EXPR_ARG (exp
, 0);
29694 arg1
= CALL_EXPR_ARG (exp
, 1);
29696 op0
= expand_normal (arg0
);
29697 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
29699 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29700 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
29701 gcc_assert (VECTOR_MODE_P (mode0
));
29703 op0
= force_reg (mode0
, op0
);
29705 if (optimize
|| !target
|| !register_operand (target
, tmode
))
29706 target
= gen_reg_rtx (tmode
);
29708 ix86_expand_vector_extract (true, target
, op0
, elt
);
29713 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29714 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29715 a language-level syntax for referencing vector elements. */
29718 ix86_expand_vec_set_builtin (tree exp
)
29720 enum machine_mode tmode
, mode1
;
29721 tree arg0
, arg1
, arg2
;
29723 rtx op0
, op1
, target
;
29725 arg0
= CALL_EXPR_ARG (exp
, 0);
29726 arg1
= CALL_EXPR_ARG (exp
, 1);
29727 arg2
= CALL_EXPR_ARG (exp
, 2);
29729 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
29730 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29731 gcc_assert (VECTOR_MODE_P (tmode
));
29733 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
29734 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
29735 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
29737 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
29738 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
29740 op0
= force_reg (tmode
, op0
);
29741 op1
= force_reg (mode1
, op1
);
29743 /* OP0 is the source of these builtin functions and shouldn't be
29744 modified. Create a copy, use it and return it as target. */
29745 target
= gen_reg_rtx (tmode
);
29746 emit_move_insn (target
, op0
);
29747 ix86_expand_vector_set (true, target
, op1
, elt
);
29752 /* Expand an expression EXP that calls a built-in function,
29753 with result going to TARGET if that's convenient
29754 (and in mode MODE if that's convenient).
29755 SUBTARGET may be used as the target for computing one of EXP's operands.
29756 IGNORE is nonzero if the value is to be ignored. */
29759 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
29760 enum machine_mode mode ATTRIBUTE_UNUSED
,
29761 int ignore ATTRIBUTE_UNUSED
)
29763 const struct builtin_description
*d
;
29765 enum insn_code icode
;
29766 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
29767 tree arg0
, arg1
, arg2
, arg3
, arg4
;
29768 rtx op0
, op1
, op2
, op3
, op4
, pat
;
29769 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
29770 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
29772 /* For CPU builtins that can be folded, fold first and expand the fold. */
29775 case IX86_BUILTIN_CPU_INIT
:
29777 /* Make it call __cpu_indicator_init in libgcc. */
29778 tree call_expr
, fndecl
, type
;
29779 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
29780 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
29781 call_expr
= build_call_expr (fndecl
, 0);
29782 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
29784 case IX86_BUILTIN_CPU_IS
:
29785 case IX86_BUILTIN_CPU_SUPPORTS
:
29787 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29788 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
29789 gcc_assert (fold_expr
!= NULL_TREE
);
29790 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
29794 /* Determine whether the builtin function is available under the current ISA.
29795 Originally the builtin was not created if it wasn't applicable to the
29796 current ISA based on the command line switches. With function specific
29797 options, we need to check in the context of the function making the call
29798 whether it is supported. */
29799 if (ix86_builtins_isa
[fcode
].isa
29800 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
29802 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
29803 NULL
, (enum fpmath_unit
) 0, false);
29806 error ("%qE needs unknown isa option", fndecl
);
29809 gcc_assert (opts
!= NULL
);
29810 error ("%qE needs isa option %s", fndecl
, opts
);
29818 case IX86_BUILTIN_MASKMOVQ
:
29819 case IX86_BUILTIN_MASKMOVDQU
:
29820 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
29821 ? CODE_FOR_mmx_maskmovq
29822 : CODE_FOR_sse2_maskmovdqu
);
29823 /* Note the arg order is different from the operand order. */
29824 arg1
= CALL_EXPR_ARG (exp
, 0);
29825 arg2
= CALL_EXPR_ARG (exp
, 1);
29826 arg0
= CALL_EXPR_ARG (exp
, 2);
29827 op0
= expand_normal (arg0
);
29828 op1
= expand_normal (arg1
);
29829 op2
= expand_normal (arg2
);
29830 mode0
= insn_data
[icode
].operand
[0].mode
;
29831 mode1
= insn_data
[icode
].operand
[1].mode
;
29832 mode2
= insn_data
[icode
].operand
[2].mode
;
29834 if (GET_MODE (op0
) != Pmode
)
29835 op0
= convert_to_mode (Pmode
, op0
, 1);
29836 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
29838 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29839 op0
= copy_to_mode_reg (mode0
, op0
);
29840 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
29841 op1
= copy_to_mode_reg (mode1
, op1
);
29842 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
29843 op2
= copy_to_mode_reg (mode2
, op2
);
29844 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
29850 case IX86_BUILTIN_LDMXCSR
:
29851 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
29852 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29853 emit_move_insn (target
, op0
);
29854 emit_insn (gen_sse_ldmxcsr (target
));
29857 case IX86_BUILTIN_STMXCSR
:
29858 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29859 emit_insn (gen_sse_stmxcsr (target
));
29860 return copy_to_mode_reg (SImode
, target
);
29862 case IX86_BUILTIN_CLFLUSH
:
29863 arg0
= CALL_EXPR_ARG (exp
, 0);
29864 op0
= expand_normal (arg0
);
29865 icode
= CODE_FOR_sse2_clflush
;
29866 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29868 if (GET_MODE (op0
) != Pmode
)
29869 op0
= convert_to_mode (Pmode
, op0
, 1);
29870 op0
= force_reg (Pmode
, op0
);
29873 emit_insn (gen_sse2_clflush (op0
));
29876 case IX86_BUILTIN_MONITOR
:
29877 arg0
= CALL_EXPR_ARG (exp
, 0);
29878 arg1
= CALL_EXPR_ARG (exp
, 1);
29879 arg2
= CALL_EXPR_ARG (exp
, 2);
29880 op0
= expand_normal (arg0
);
29881 op1
= expand_normal (arg1
);
29882 op2
= expand_normal (arg2
);
29885 if (GET_MODE (op0
) != Pmode
)
29886 op0
= convert_to_mode (Pmode
, op0
, 1);
29887 op0
= force_reg (Pmode
, op0
);
29890 op1
= copy_to_mode_reg (SImode
, op1
);
29892 op2
= copy_to_mode_reg (SImode
, op2
);
29893 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
29896 case IX86_BUILTIN_MWAIT
:
29897 arg0
= CALL_EXPR_ARG (exp
, 0);
29898 arg1
= CALL_EXPR_ARG (exp
, 1);
29899 op0
= expand_normal (arg0
);
29900 op1
= expand_normal (arg1
);
29902 op0
= copy_to_mode_reg (SImode
, op0
);
29904 op1
= copy_to_mode_reg (SImode
, op1
);
29905 emit_insn (gen_sse3_mwait (op0
, op1
));
29908 case IX86_BUILTIN_VEC_INIT_V2SI
:
29909 case IX86_BUILTIN_VEC_INIT_V4HI
:
29910 case IX86_BUILTIN_VEC_INIT_V8QI
:
29911 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
29913 case IX86_BUILTIN_VEC_EXT_V2DF
:
29914 case IX86_BUILTIN_VEC_EXT_V2DI
:
29915 case IX86_BUILTIN_VEC_EXT_V4SF
:
29916 case IX86_BUILTIN_VEC_EXT_V4SI
:
29917 case IX86_BUILTIN_VEC_EXT_V8HI
:
29918 case IX86_BUILTIN_VEC_EXT_V2SI
:
29919 case IX86_BUILTIN_VEC_EXT_V4HI
:
29920 case IX86_BUILTIN_VEC_EXT_V16QI
:
29921 return ix86_expand_vec_ext_builtin (exp
, target
);
29923 case IX86_BUILTIN_VEC_SET_V2DI
:
29924 case IX86_BUILTIN_VEC_SET_V4SF
:
29925 case IX86_BUILTIN_VEC_SET_V4SI
:
29926 case IX86_BUILTIN_VEC_SET_V8HI
:
29927 case IX86_BUILTIN_VEC_SET_V4HI
:
29928 case IX86_BUILTIN_VEC_SET_V16QI
:
29929 return ix86_expand_vec_set_builtin (exp
);
29931 case IX86_BUILTIN_INFQ
:
29932 case IX86_BUILTIN_HUGE_VALQ
:
29934 REAL_VALUE_TYPE inf
;
29938 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
29940 tmp
= validize_mem (force_const_mem (mode
, tmp
));
29943 target
= gen_reg_rtx (mode
);
29945 emit_move_insn (target
, tmp
);
29949 case IX86_BUILTIN_LLWPCB
:
29950 arg0
= CALL_EXPR_ARG (exp
, 0);
29951 op0
= expand_normal (arg0
);
29952 icode
= CODE_FOR_lwp_llwpcb
;
29953 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29955 if (GET_MODE (op0
) != Pmode
)
29956 op0
= convert_to_mode (Pmode
, op0
, 1);
29957 op0
= force_reg (Pmode
, op0
);
29959 emit_insn (gen_lwp_llwpcb (op0
));
29962 case IX86_BUILTIN_SLWPCB
:
29963 icode
= CODE_FOR_lwp_slwpcb
;
29965 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
29966 target
= gen_reg_rtx (Pmode
);
29967 emit_insn (gen_lwp_slwpcb (target
));
29970 case IX86_BUILTIN_BEXTRI32
:
29971 case IX86_BUILTIN_BEXTRI64
:
29972 arg0
= CALL_EXPR_ARG (exp
, 0);
29973 arg1
= CALL_EXPR_ARG (exp
, 1);
29974 op0
= expand_normal (arg0
);
29975 op1
= expand_normal (arg1
);
29976 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
29977 ? CODE_FOR_tbm_bextri_si
29978 : CODE_FOR_tbm_bextri_di
);
29979 if (!CONST_INT_P (op1
))
29981 error ("last argument must be an immediate");
29986 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
29987 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
29988 op1
= GEN_INT (length
);
29989 op2
= GEN_INT (lsb_index
);
29990 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
29996 case IX86_BUILTIN_RDRAND16_STEP
:
29997 icode
= CODE_FOR_rdrandhi_1
;
30001 case IX86_BUILTIN_RDRAND32_STEP
:
30002 icode
= CODE_FOR_rdrandsi_1
;
30006 case IX86_BUILTIN_RDRAND64_STEP
:
30007 icode
= CODE_FOR_rdranddi_1
;
30011 op0
= gen_reg_rtx (mode0
);
30012 emit_insn (GEN_FCN (icode
) (op0
));
30014 arg0
= CALL_EXPR_ARG (exp
, 0);
30015 op1
= expand_normal (arg0
);
30016 if (!address_operand (op1
, VOIDmode
))
30018 op1
= convert_memory_address (Pmode
, op1
);
30019 op1
= copy_addr_to_reg (op1
);
30021 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
30023 op1
= gen_reg_rtx (SImode
);
30024 emit_move_insn (op1
, CONST1_RTX (SImode
));
30026 /* Emit SImode conditional move. */
30027 if (mode0
== HImode
)
30029 op2
= gen_reg_rtx (SImode
);
30030 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
30032 else if (mode0
== SImode
)
30035 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
30038 target
= gen_reg_rtx (SImode
);
30040 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
30042 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30043 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
30046 case IX86_BUILTIN_GATHERSIV2DF
:
30047 icode
= CODE_FOR_avx2_gathersiv2df
;
30049 case IX86_BUILTIN_GATHERSIV4DF
:
30050 icode
= CODE_FOR_avx2_gathersiv4df
;
30052 case IX86_BUILTIN_GATHERDIV2DF
:
30053 icode
= CODE_FOR_avx2_gatherdiv2df
;
30055 case IX86_BUILTIN_GATHERDIV4DF
:
30056 icode
= CODE_FOR_avx2_gatherdiv4df
;
30058 case IX86_BUILTIN_GATHERSIV4SF
:
30059 icode
= CODE_FOR_avx2_gathersiv4sf
;
30061 case IX86_BUILTIN_GATHERSIV8SF
:
30062 icode
= CODE_FOR_avx2_gathersiv8sf
;
30064 case IX86_BUILTIN_GATHERDIV4SF
:
30065 icode
= CODE_FOR_avx2_gatherdiv4sf
;
30067 case IX86_BUILTIN_GATHERDIV8SF
:
30068 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30070 case IX86_BUILTIN_GATHERSIV2DI
:
30071 icode
= CODE_FOR_avx2_gathersiv2di
;
30073 case IX86_BUILTIN_GATHERSIV4DI
:
30074 icode
= CODE_FOR_avx2_gathersiv4di
;
30076 case IX86_BUILTIN_GATHERDIV2DI
:
30077 icode
= CODE_FOR_avx2_gatherdiv2di
;
30079 case IX86_BUILTIN_GATHERDIV4DI
:
30080 icode
= CODE_FOR_avx2_gatherdiv4di
;
30082 case IX86_BUILTIN_GATHERSIV4SI
:
30083 icode
= CODE_FOR_avx2_gathersiv4si
;
30085 case IX86_BUILTIN_GATHERSIV8SI
:
30086 icode
= CODE_FOR_avx2_gathersiv8si
;
30088 case IX86_BUILTIN_GATHERDIV4SI
:
30089 icode
= CODE_FOR_avx2_gatherdiv4si
;
30091 case IX86_BUILTIN_GATHERDIV8SI
:
30092 icode
= CODE_FOR_avx2_gatherdiv8si
;
30094 case IX86_BUILTIN_GATHERALTSIV4DF
:
30095 icode
= CODE_FOR_avx2_gathersiv4df
;
30097 case IX86_BUILTIN_GATHERALTDIV8SF
:
30098 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30100 case IX86_BUILTIN_GATHERALTSIV4DI
:
30101 icode
= CODE_FOR_avx2_gathersiv4di
;
30103 case IX86_BUILTIN_GATHERALTDIV8SI
:
30104 icode
= CODE_FOR_avx2_gatherdiv8si
;
30108 arg0
= CALL_EXPR_ARG (exp
, 0);
30109 arg1
= CALL_EXPR_ARG (exp
, 1);
30110 arg2
= CALL_EXPR_ARG (exp
, 2);
30111 arg3
= CALL_EXPR_ARG (exp
, 3);
30112 arg4
= CALL_EXPR_ARG (exp
, 4);
30113 op0
= expand_normal (arg0
);
30114 op1
= expand_normal (arg1
);
30115 op2
= expand_normal (arg2
);
30116 op3
= expand_normal (arg3
);
30117 op4
= expand_normal (arg4
);
30118 /* Note the arg order is different from the operand order. */
30119 mode0
= insn_data
[icode
].operand
[1].mode
;
30120 mode2
= insn_data
[icode
].operand
[3].mode
;
30121 mode3
= insn_data
[icode
].operand
[4].mode
;
30122 mode4
= insn_data
[icode
].operand
[5].mode
;
30124 if (target
== NULL_RTX
30125 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
30126 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
30128 subtarget
= target
;
30130 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
30131 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
30133 rtx half
= gen_reg_rtx (V4SImode
);
30134 if (!nonimmediate_operand (op2
, V8SImode
))
30135 op2
= copy_to_mode_reg (V8SImode
, op2
);
30136 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
30139 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
30140 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
30142 rtx (*gen
) (rtx
, rtx
);
30143 rtx half
= gen_reg_rtx (mode0
);
30144 if (mode0
== V4SFmode
)
30145 gen
= gen_vec_extract_lo_v8sf
;
30147 gen
= gen_vec_extract_lo_v8si
;
30148 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
30149 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
30150 emit_insn (gen (half
, op0
));
30152 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
30153 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
30154 emit_insn (gen (half
, op3
));
30158 /* Force memory operand only with base register here. But we
30159 don't want to do it on memory operand for other builtin
30161 if (GET_MODE (op1
) != Pmode
)
30162 op1
= convert_to_mode (Pmode
, op1
, 1);
30163 op1
= force_reg (Pmode
, op1
);
30165 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30166 op0
= copy_to_mode_reg (mode0
, op0
);
30167 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
30168 op1
= copy_to_mode_reg (Pmode
, op1
);
30169 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
30170 op2
= copy_to_mode_reg (mode2
, op2
);
30171 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
30172 op3
= copy_to_mode_reg (mode3
, op3
);
30173 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
30175 error ("last argument must be scale 1, 2, 4, 8");
30179 /* Optimize. If mask is known to have all high bits set,
30180 replace op0 with pc_rtx to signal that the instruction
30181 overwrites the whole destination and doesn't use its
30182 previous contents. */
30185 if (TREE_CODE (arg3
) == VECTOR_CST
)
30187 unsigned int negative
= 0;
30188 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
30190 tree cst
= VECTOR_CST_ELT (arg3
, i
);
30191 if (TREE_CODE (cst
) == INTEGER_CST
30192 && tree_int_cst_sign_bit (cst
))
30194 else if (TREE_CODE (cst
) == REAL_CST
30195 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
30198 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
30201 else if (TREE_CODE (arg3
) == SSA_NAME
)
30203 /* Recognize also when mask is like:
30204 __v2df src = _mm_setzero_pd ();
30205 __v2df mask = _mm_cmpeq_pd (src, src);
30207 __v8sf src = _mm256_setzero_ps ();
30208 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
30209 as that is a cheaper way to load all ones into
30210 a register than having to load a constant from
30212 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
30213 if (is_gimple_call (def_stmt
))
30215 tree fndecl
= gimple_call_fndecl (def_stmt
);
30217 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30218 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
30220 case IX86_BUILTIN_CMPPD
:
30221 case IX86_BUILTIN_CMPPS
:
30222 case IX86_BUILTIN_CMPPD256
:
30223 case IX86_BUILTIN_CMPPS256
:
30224 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
30227 case IX86_BUILTIN_CMPEQPD
:
30228 case IX86_BUILTIN_CMPEQPS
:
30229 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
30230 && initializer_zerop (gimple_call_arg (def_stmt
,
30241 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
30246 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
30247 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
30249 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
30250 ? V4SFmode
: V4SImode
;
30251 if (target
== NULL_RTX
)
30252 target
= gen_reg_rtx (tmode
);
30253 if (tmode
== V4SFmode
)
30254 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
30256 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
30259 target
= subtarget
;
30263 case IX86_BUILTIN_XABORT
:
30264 icode
= CODE_FOR_xabort
;
30265 arg0
= CALL_EXPR_ARG (exp
, 0);
30266 op0
= expand_normal (arg0
);
30267 mode0
= insn_data
[icode
].operand
[0].mode
;
30268 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
30270 error ("the xabort's argument must be an 8-bit immediate");
30273 emit_insn (gen_xabort (op0
));
30280 for (i
= 0, d
= bdesc_special_args
;
30281 i
< ARRAY_SIZE (bdesc_special_args
);
30283 if (d
->code
== fcode
)
30284 return ix86_expand_special_args_builtin (d
, exp
, target
);
30286 for (i
= 0, d
= bdesc_args
;
30287 i
< ARRAY_SIZE (bdesc_args
);
30289 if (d
->code
== fcode
)
30292 case IX86_BUILTIN_FABSQ
:
30293 case IX86_BUILTIN_COPYSIGNQ
:
30295 /* Emit a normal call if SSE isn't available. */
30296 return expand_call (exp
, target
, ignore
);
30298 return ix86_expand_args_builtin (d
, exp
, target
);
30301 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
30302 if (d
->code
== fcode
)
30303 return ix86_expand_sse_comi (d
, exp
, target
);
30305 for (i
= 0, d
= bdesc_pcmpestr
;
30306 i
< ARRAY_SIZE (bdesc_pcmpestr
);
30308 if (d
->code
== fcode
)
30309 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
30311 for (i
= 0, d
= bdesc_pcmpistr
;
30312 i
< ARRAY_SIZE (bdesc_pcmpistr
);
30314 if (d
->code
== fcode
)
30315 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
30317 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
30318 if (d
->code
== fcode
)
30319 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
30320 (enum ix86_builtin_func_type
)
30321 d
->flag
, d
->comparison
);
30323 gcc_unreachable ();
30326 /* Returns a function decl for a vectorized version of the builtin function
30327 with builtin function code FN and the result vector type TYPE, or NULL_TREE
30328 if it is not available. */
30331 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
30334 enum machine_mode in_mode
, out_mode
;
30336 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
30338 if (TREE_CODE (type_out
) != VECTOR_TYPE
30339 || TREE_CODE (type_in
) != VECTOR_TYPE
30340 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
30343 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30344 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
30345 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30346 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30350 case BUILT_IN_SQRT
:
30351 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30353 if (out_n
== 2 && in_n
== 2)
30354 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
30355 else if (out_n
== 4 && in_n
== 4)
30356 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
30360 case BUILT_IN_SQRTF
:
30361 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30363 if (out_n
== 4 && in_n
== 4)
30364 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
30365 else if (out_n
== 8 && in_n
== 8)
30366 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
30370 case BUILT_IN_IFLOOR
:
30371 case BUILT_IN_LFLOOR
:
30372 case BUILT_IN_LLFLOOR
:
30373 /* The round insn does not trap on denormals. */
30374 if (flag_trapping_math
|| !TARGET_ROUND
)
30377 if (out_mode
== SImode
&& in_mode
== DFmode
)
30379 if (out_n
== 4 && in_n
== 2)
30380 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
30381 else if (out_n
== 8 && in_n
== 4)
30382 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
30386 case BUILT_IN_IFLOORF
:
30387 case BUILT_IN_LFLOORF
:
30388 case BUILT_IN_LLFLOORF
:
30389 /* The round insn does not trap on denormals. */
30390 if (flag_trapping_math
|| !TARGET_ROUND
)
30393 if (out_mode
== SImode
&& in_mode
== SFmode
)
30395 if (out_n
== 4 && in_n
== 4)
30396 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
30397 else if (out_n
== 8 && in_n
== 8)
30398 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
30402 case BUILT_IN_ICEIL
:
30403 case BUILT_IN_LCEIL
:
30404 case BUILT_IN_LLCEIL
:
30405 /* The round insn does not trap on denormals. */
30406 if (flag_trapping_math
|| !TARGET_ROUND
)
30409 if (out_mode
== SImode
&& in_mode
== DFmode
)
30411 if (out_n
== 4 && in_n
== 2)
30412 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
30413 else if (out_n
== 8 && in_n
== 4)
30414 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
30418 case BUILT_IN_ICEILF
:
30419 case BUILT_IN_LCEILF
:
30420 case BUILT_IN_LLCEILF
:
30421 /* The round insn does not trap on denormals. */
30422 if (flag_trapping_math
|| !TARGET_ROUND
)
30425 if (out_mode
== SImode
&& in_mode
== SFmode
)
30427 if (out_n
== 4 && in_n
== 4)
30428 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
30429 else if (out_n
== 8 && in_n
== 8)
30430 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
30434 case BUILT_IN_IRINT
:
30435 case BUILT_IN_LRINT
:
30436 case BUILT_IN_LLRINT
:
30437 if (out_mode
== SImode
&& in_mode
== DFmode
)
30439 if (out_n
== 4 && in_n
== 2)
30440 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
30441 else if (out_n
== 8 && in_n
== 4)
30442 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
30446 case BUILT_IN_IRINTF
:
30447 case BUILT_IN_LRINTF
:
30448 case BUILT_IN_LLRINTF
:
30449 if (out_mode
== SImode
&& in_mode
== SFmode
)
30451 if (out_n
== 4 && in_n
== 4)
30452 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
30453 else if (out_n
== 8 && in_n
== 8)
30454 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
30458 case BUILT_IN_IROUND
:
30459 case BUILT_IN_LROUND
:
30460 case BUILT_IN_LLROUND
:
30461 /* The round insn does not trap on denormals. */
30462 if (flag_trapping_math
|| !TARGET_ROUND
)
30465 if (out_mode
== SImode
&& in_mode
== DFmode
)
30467 if (out_n
== 4 && in_n
== 2)
30468 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
30469 else if (out_n
== 8 && in_n
== 4)
30470 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
30474 case BUILT_IN_IROUNDF
:
30475 case BUILT_IN_LROUNDF
:
30476 case BUILT_IN_LLROUNDF
:
30477 /* The round insn does not trap on denormals. */
30478 if (flag_trapping_math
|| !TARGET_ROUND
)
30481 if (out_mode
== SImode
&& in_mode
== SFmode
)
30483 if (out_n
== 4 && in_n
== 4)
30484 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
30485 else if (out_n
== 8 && in_n
== 8)
30486 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
30490 case BUILT_IN_COPYSIGN
:
30491 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30493 if (out_n
== 2 && in_n
== 2)
30494 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
30495 else if (out_n
== 4 && in_n
== 4)
30496 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
30500 case BUILT_IN_COPYSIGNF
:
30501 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30503 if (out_n
== 4 && in_n
== 4)
30504 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
30505 else if (out_n
== 8 && in_n
== 8)
30506 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
30510 case BUILT_IN_FLOOR
:
30511 /* The round insn does not trap on denormals. */
30512 if (flag_trapping_math
|| !TARGET_ROUND
)
30515 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30517 if (out_n
== 2 && in_n
== 2)
30518 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
30519 else if (out_n
== 4 && in_n
== 4)
30520 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
30524 case BUILT_IN_FLOORF
:
30525 /* The round insn does not trap on denormals. */
30526 if (flag_trapping_math
|| !TARGET_ROUND
)
30529 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30531 if (out_n
== 4 && in_n
== 4)
30532 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
30533 else if (out_n
== 8 && in_n
== 8)
30534 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
30538 case BUILT_IN_CEIL
:
30539 /* The round insn does not trap on denormals. */
30540 if (flag_trapping_math
|| !TARGET_ROUND
)
30543 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30545 if (out_n
== 2 && in_n
== 2)
30546 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
30547 else if (out_n
== 4 && in_n
== 4)
30548 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
30552 case BUILT_IN_CEILF
:
30553 /* The round insn does not trap on denormals. */
30554 if (flag_trapping_math
|| !TARGET_ROUND
)
30557 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30559 if (out_n
== 4 && in_n
== 4)
30560 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
30561 else if (out_n
== 8 && in_n
== 8)
30562 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
30566 case BUILT_IN_TRUNC
:
30567 /* The round insn does not trap on denormals. */
30568 if (flag_trapping_math
|| !TARGET_ROUND
)
30571 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30573 if (out_n
== 2 && in_n
== 2)
30574 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
30575 else if (out_n
== 4 && in_n
== 4)
30576 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
30580 case BUILT_IN_TRUNCF
:
30581 /* The round insn does not trap on denormals. */
30582 if (flag_trapping_math
|| !TARGET_ROUND
)
30585 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30587 if (out_n
== 4 && in_n
== 4)
30588 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
30589 else if (out_n
== 8 && in_n
== 8)
30590 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
30594 case BUILT_IN_RINT
:
30595 /* The round insn does not trap on denormals. */
30596 if (flag_trapping_math
|| !TARGET_ROUND
)
30599 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30601 if (out_n
== 2 && in_n
== 2)
30602 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
30603 else if (out_n
== 4 && in_n
== 4)
30604 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
30608 case BUILT_IN_RINTF
:
30609 /* The round insn does not trap on denormals. */
30610 if (flag_trapping_math
|| !TARGET_ROUND
)
30613 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30615 if (out_n
== 4 && in_n
== 4)
30616 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
30617 else if (out_n
== 8 && in_n
== 8)
30618 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
30622 case BUILT_IN_ROUND
:
30623 /* The round insn does not trap on denormals. */
30624 if (flag_trapping_math
|| !TARGET_ROUND
)
30627 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30629 if (out_n
== 2 && in_n
== 2)
30630 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
30631 else if (out_n
== 4 && in_n
== 4)
30632 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
30636 case BUILT_IN_ROUNDF
:
30637 /* The round insn does not trap on denormals. */
30638 if (flag_trapping_math
|| !TARGET_ROUND
)
30641 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30643 if (out_n
== 4 && in_n
== 4)
30644 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
30645 else if (out_n
== 8 && in_n
== 8)
30646 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
30651 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30653 if (out_n
== 2 && in_n
== 2)
30654 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
30655 if (out_n
== 4 && in_n
== 4)
30656 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
30660 case BUILT_IN_FMAF
:
30661 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30663 if (out_n
== 4 && in_n
== 4)
30664 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
30665 if (out_n
== 8 && in_n
== 8)
30666 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
30674 /* Dispatch to a handler for a vectorization library. */
30675 if (ix86_veclib_handler
)
30676 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
30682 /* Handler for an SVML-style interface to
30683 a library with vectorized intrinsics. */
30686 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
30689 tree fntype
, new_fndecl
, args
;
30692 enum machine_mode el_mode
, in_mode
;
30695 /* The SVML is suitable for unsafe math only. */
30696 if (!flag_unsafe_math_optimizations
)
30699 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30700 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30701 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30702 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30703 if (el_mode
!= in_mode
30711 case BUILT_IN_LOG10
:
30713 case BUILT_IN_TANH
:
30715 case BUILT_IN_ATAN
:
30716 case BUILT_IN_ATAN2
:
30717 case BUILT_IN_ATANH
:
30718 case BUILT_IN_CBRT
:
30719 case BUILT_IN_SINH
:
30721 case BUILT_IN_ASINH
:
30722 case BUILT_IN_ASIN
:
30723 case BUILT_IN_COSH
:
30725 case BUILT_IN_ACOSH
:
30726 case BUILT_IN_ACOS
:
30727 if (el_mode
!= DFmode
|| n
!= 2)
30731 case BUILT_IN_EXPF
:
30732 case BUILT_IN_LOGF
:
30733 case BUILT_IN_LOG10F
:
30734 case BUILT_IN_POWF
:
30735 case BUILT_IN_TANHF
:
30736 case BUILT_IN_TANF
:
30737 case BUILT_IN_ATANF
:
30738 case BUILT_IN_ATAN2F
:
30739 case BUILT_IN_ATANHF
:
30740 case BUILT_IN_CBRTF
:
30741 case BUILT_IN_SINHF
:
30742 case BUILT_IN_SINF
:
30743 case BUILT_IN_ASINHF
:
30744 case BUILT_IN_ASINF
:
30745 case BUILT_IN_COSHF
:
30746 case BUILT_IN_COSF
:
30747 case BUILT_IN_ACOSHF
:
30748 case BUILT_IN_ACOSF
:
30749 if (el_mode
!= SFmode
|| n
!= 4)
30757 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30759 if (fn
== BUILT_IN_LOGF
)
30760 strcpy (name
, "vmlsLn4");
30761 else if (fn
== BUILT_IN_LOG
)
30762 strcpy (name
, "vmldLn2");
30765 sprintf (name
, "vmls%s", bname
+10);
30766 name
[strlen (name
)-1] = '4';
30769 sprintf (name
, "vmld%s2", bname
+10);
30771 /* Convert to uppercase. */
30775 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30777 args
= TREE_CHAIN (args
))
30781 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30783 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30785 /* Build a function declaration for the vectorized function. */
30786 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30787 FUNCTION_DECL
, get_identifier (name
), fntype
);
30788 TREE_PUBLIC (new_fndecl
) = 1;
30789 DECL_EXTERNAL (new_fndecl
) = 1;
30790 DECL_IS_NOVOPS (new_fndecl
) = 1;
30791 TREE_READONLY (new_fndecl
) = 1;
30796 /* Handler for an ACML-style interface to
30797 a library with vectorized intrinsics. */
30800 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
30802 char name
[20] = "__vr.._";
30803 tree fntype
, new_fndecl
, args
;
30806 enum machine_mode el_mode
, in_mode
;
30809 /* The ACML is 64bits only and suitable for unsafe math only as
30810 it does not correctly support parts of IEEE with the required
30811 precision such as denormals. */
30813 || !flag_unsafe_math_optimizations
)
30816 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30817 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30818 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30819 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30820 if (el_mode
!= in_mode
30830 case BUILT_IN_LOG2
:
30831 case BUILT_IN_LOG10
:
30834 if (el_mode
!= DFmode
30839 case BUILT_IN_SINF
:
30840 case BUILT_IN_COSF
:
30841 case BUILT_IN_EXPF
:
30842 case BUILT_IN_POWF
:
30843 case BUILT_IN_LOGF
:
30844 case BUILT_IN_LOG2F
:
30845 case BUILT_IN_LOG10F
:
30848 if (el_mode
!= SFmode
30857 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30858 sprintf (name
+ 7, "%s", bname
+10);
30861 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30863 args
= TREE_CHAIN (args
))
30867 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30869 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30871 /* Build a function declaration for the vectorized function. */
30872 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30873 FUNCTION_DECL
, get_identifier (name
), fntype
);
30874 TREE_PUBLIC (new_fndecl
) = 1;
30875 DECL_EXTERNAL (new_fndecl
) = 1;
30876 DECL_IS_NOVOPS (new_fndecl
) = 1;
30877 TREE_READONLY (new_fndecl
) = 1;
30882 /* Returns a decl of a function that implements gather load with
30883 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30884 Return NULL_TREE if it is not available. */
30887 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
30888 const_tree index_type
, int scale
)
30891 enum ix86_builtins code
;
30896 if ((TREE_CODE (index_type
) != INTEGER_TYPE
30897 && !POINTER_TYPE_P (index_type
))
30898 || (TYPE_MODE (index_type
) != SImode
30899 && TYPE_MODE (index_type
) != DImode
))
30902 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
30905 /* v*gather* insn sign extends index to pointer mode. */
30906 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
30907 && TYPE_UNSIGNED (index_type
))
30912 || (scale
& (scale
- 1)) != 0)
30915 si
= TYPE_MODE (index_type
) == SImode
;
30916 switch (TYPE_MODE (mem_vectype
))
30919 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
30922 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
30925 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
30928 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
30931 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
30934 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
30937 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
30940 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
30946 return ix86_builtins
[code
];
30949 /* Returns a code for a target-specific builtin that implements
30950 reciprocal of the function, or NULL_TREE if not available. */
30953 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
30954 bool sqrt ATTRIBUTE_UNUSED
)
30956 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
30957 && flag_finite_math_only
&& !flag_trapping_math
30958 && flag_unsafe_math_optimizations
))
30962 /* Machine dependent builtins. */
30965 /* Vectorized version of sqrt to rsqrt conversion. */
30966 case IX86_BUILTIN_SQRTPS_NR
:
30967 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
30969 case IX86_BUILTIN_SQRTPS_NR256
:
30970 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
30976 /* Normal builtins. */
30979 /* Sqrt to rsqrt conversion. */
30980 case BUILT_IN_SQRTF
:
30981 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
30988 /* Helper for avx_vpermilps256_operand et al. This is also used by
30989 the expansion functions to turn the parallel back into a mask.
30990 The return value is 0 for no match and the imm8+1 for a match. */
30993 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
30995 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
30997 unsigned char ipar
[8];
30999 if (XVECLEN (par
, 0) != (int) nelt
)
31002 /* Validate that all of the elements are constants, and not totally
31003 out of range. Copy the data into an integral array to make the
31004 subsequent checks easier. */
31005 for (i
= 0; i
< nelt
; ++i
)
31007 rtx er
= XVECEXP (par
, 0, i
);
31008 unsigned HOST_WIDE_INT ei
;
31010 if (!CONST_INT_P (er
))
31021 /* In the 256-bit DFmode case, we can only move elements within
31023 for (i
= 0; i
< 2; ++i
)
31027 mask
|= ipar
[i
] << i
;
31029 for (i
= 2; i
< 4; ++i
)
31033 mask
|= (ipar
[i
] - 2) << i
;
31038 /* In the 256-bit SFmode case, we have full freedom of movement
31039 within the low 128-bit lane, but the high 128-bit lane must
31040 mirror the exact same pattern. */
31041 for (i
= 0; i
< 4; ++i
)
31042 if (ipar
[i
] + 4 != ipar
[i
+ 4])
31049 /* In the 128-bit case, we've full freedom in the placement of
31050 the elements from the source operand. */
31051 for (i
= 0; i
< nelt
; ++i
)
31052 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
31056 gcc_unreachable ();
31059 /* Make sure success has a non-zero value by adding one. */
31063 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
31064 the expansion functions to turn the parallel back into a mask.
31065 The return value is 0 for no match and the imm8+1 for a match. */
31068 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
31070 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
31072 unsigned char ipar
[8];
31074 if (XVECLEN (par
, 0) != (int) nelt
)
31077 /* Validate that all of the elements are constants, and not totally
31078 out of range. Copy the data into an integral array to make the
31079 subsequent checks easier. */
31080 for (i
= 0; i
< nelt
; ++i
)
31082 rtx er
= XVECEXP (par
, 0, i
);
31083 unsigned HOST_WIDE_INT ei
;
31085 if (!CONST_INT_P (er
))
31088 if (ei
>= 2 * nelt
)
31093 /* Validate that the halves of the permute are halves. */
31094 for (i
= 0; i
< nelt2
- 1; ++i
)
31095 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31097 for (i
= nelt2
; i
< nelt
- 1; ++i
)
31098 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31101 /* Reconstruct the mask. */
31102 for (i
= 0; i
< 2; ++i
)
31104 unsigned e
= ipar
[i
* nelt2
];
31108 mask
|= e
<< (i
* 4);
31111 /* Make sure success has a non-zero value by adding one. */
31115 /* Store OPERAND to the memory after reload is completed. This means
31116 that we can't easily use assign_stack_local. */
31118 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
31122 gcc_assert (reload_completed
);
31123 if (ix86_using_red_zone ())
31125 result
= gen_rtx_MEM (mode
,
31126 gen_rtx_PLUS (Pmode
,
31128 GEN_INT (-RED_ZONE_SIZE
)));
31129 emit_move_insn (result
, operand
);
31131 else if (TARGET_64BIT
)
31137 operand
= gen_lowpart (DImode
, operand
);
31141 gen_rtx_SET (VOIDmode
,
31142 gen_rtx_MEM (DImode
,
31143 gen_rtx_PRE_DEC (DImode
,
31144 stack_pointer_rtx
)),
31148 gcc_unreachable ();
31150 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31159 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
31161 gen_rtx_SET (VOIDmode
,
31162 gen_rtx_MEM (SImode
,
31163 gen_rtx_PRE_DEC (Pmode
,
31164 stack_pointer_rtx
)),
31167 gen_rtx_SET (VOIDmode
,
31168 gen_rtx_MEM (SImode
,
31169 gen_rtx_PRE_DEC (Pmode
,
31170 stack_pointer_rtx
)),
31175 /* Store HImodes as SImodes. */
31176 operand
= gen_lowpart (SImode
, operand
);
31180 gen_rtx_SET (VOIDmode
,
31181 gen_rtx_MEM (GET_MODE (operand
),
31182 gen_rtx_PRE_DEC (SImode
,
31183 stack_pointer_rtx
)),
31187 gcc_unreachable ();
31189 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31194 /* Free operand from the memory. */
31196 ix86_free_from_memory (enum machine_mode mode
)
31198 if (!ix86_using_red_zone ())
31202 if (mode
== DImode
|| TARGET_64BIT
)
31206 /* Use LEA to deallocate stack space. In peephole2 it will be converted
31207 to pop or add instruction if registers are available. */
31208 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
31209 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
31214 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
31216 Put float CONST_DOUBLE in the constant pool instead of fp regs.
31217 QImode must go into class Q_REGS.
31218 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
31219 movdf to do mem-to-mem moves through integer regs. */
31222 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
31224 enum machine_mode mode
= GET_MODE (x
);
31226 /* We're only allowed to return a subclass of CLASS. Many of the
31227 following checks fail for NO_REGS, so eliminate that early. */
31228 if (regclass
== NO_REGS
)
31231 /* All classes can load zeros. */
31232 if (x
== CONST0_RTX (mode
))
31235 /* Force constants into memory if we are loading a (nonzero) constant into
31236 an MMX or SSE register. This is because there are no MMX/SSE instructions
31237 to load from a constant. */
31239 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
31242 /* Prefer SSE regs only, if we can use them for math. */
31243 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
31244 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31246 /* Floating-point constants need more complex checks. */
31247 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
31249 /* General regs can load everything. */
31250 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
31253 /* Floats can load 0 and 1 plus some others. Note that we eliminated
31254 zero above. We only want to wind up preferring 80387 registers if
31255 we plan on doing computation with them. */
31257 && standard_80387_constant_p (x
) > 0)
31259 /* Limit class to non-sse. */
31260 if (regclass
== FLOAT_SSE_REGS
)
31262 if (regclass
== FP_TOP_SSE_REGS
)
31264 if (regclass
== FP_SECOND_SSE_REGS
)
31265 return FP_SECOND_REG
;
31266 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
31273 /* Generally when we see PLUS here, it's the function invariant
31274 (plus soft-fp const_int). Which can only be computed into general
31276 if (GET_CODE (x
) == PLUS
)
31277 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
31279 /* QImode constants are easy to load, but non-constant QImode data
31280 must go into Q_REGS. */
31281 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
31283 if (reg_class_subset_p (regclass
, Q_REGS
))
31285 if (reg_class_subset_p (Q_REGS
, regclass
))
31293 /* Discourage putting floating-point values in SSE registers unless
31294 SSE math is being used, and likewise for the 387 registers. */
31296 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
31298 enum machine_mode mode
= GET_MODE (x
);
31300 /* Restrict the output reload class to the register bank that we are doing
31301 math on. If we would like not to return a subset of CLASS, reject this
31302 alternative: if reload cannot do this, it will still use its choice. */
31303 mode
= GET_MODE (x
);
31304 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
31305 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
31307 if (X87_FLOAT_MODE_P (mode
))
31309 if (regclass
== FP_TOP_SSE_REGS
)
31311 else if (regclass
== FP_SECOND_SSE_REGS
)
31312 return FP_SECOND_REG
;
31314 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31321 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
31322 enum machine_mode mode
, secondary_reload_info
*sri
)
31324 /* Double-word spills from general registers to non-offsettable memory
31325 references (zero-extended addresses) require special handling. */
31328 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
31329 && rclass
== GENERAL_REGS
31330 && !offsettable_memref_p (x
))
31333 ? CODE_FOR_reload_noff_load
31334 : CODE_FOR_reload_noff_store
);
31335 /* Add the cost of moving address to a temporary. */
31336 sri
->extra_cost
= 1;
31341 /* QImode spills from non-QI registers require
31342 intermediate register on 32bit targets. */
31344 && !in_p
&& mode
== QImode
31345 && (rclass
== GENERAL_REGS
31346 || rclass
== LEGACY_REGS
31347 || rclass
== INDEX_REGS
))
31356 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
31357 regno
= true_regnum (x
);
31359 /* Return Q_REGS if the operand is in memory. */
31364 /* This condition handles corner case where an expression involving
31365 pointers gets vectorized. We're trying to use the address of a
31366 stack slot as a vector initializer.
31368 (set (reg:V2DI 74 [ vect_cst_.2 ])
31369 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
31371 Eventually frame gets turned into sp+offset like this:
31373 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31374 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31375 (const_int 392 [0x188]))))
31377 That later gets turned into:
31379 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31380 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31381 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
31383 We'll have the following reload recorded:
31385 Reload 0: reload_in (DI) =
31386 (plus:DI (reg/f:DI 7 sp)
31387 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
31388 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31389 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
31390 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
31391 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31392 reload_reg_rtx: (reg:V2DI 22 xmm1)
31394 Which isn't going to work since SSE instructions can't handle scalar
31395 additions. Returning GENERAL_REGS forces the addition into integer
31396 register and reload can handle subsequent reloads without problems. */
31398 if (in_p
&& GET_CODE (x
) == PLUS
31399 && SSE_CLASS_P (rclass
)
31400 && SCALAR_INT_MODE_P (mode
))
31401 return GENERAL_REGS
;
31406 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
31409 ix86_class_likely_spilled_p (reg_class_t rclass
)
31420 case SSE_FIRST_REG
:
31422 case FP_SECOND_REG
:
31432 /* If we are copying between general and FP registers, we need a memory
31433 location. The same is true for SSE and MMX registers.
31435 To optimize register_move_cost performance, allow inline variant.
31437 The macro can't work reliably when one of the CLASSES is class containing
31438 registers from multiple units (SSE, MMX, integer). We avoid this by never
31439 combining those units in single alternative in the machine description.
31440 Ensure that this constraint holds to avoid unexpected surprises.
31442 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
31443 enforce these sanity checks. */
31446 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31447 enum machine_mode mode
, int strict
)
31449 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
31450 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
31451 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
31452 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
31453 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
31454 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
31456 gcc_assert (!strict
);
31460 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
31463 /* ??? This is a lie. We do have moves between mmx/general, and for
31464 mmx/sse2. But by saying we need secondary memory we discourage the
31465 register allocator from using the mmx registers unless needed. */
31466 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
31469 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31471 /* SSE1 doesn't have any direct moves from other classes. */
31475 /* If the target says that inter-unit moves are more expensive
31476 than moving through memory, then don't generate them. */
31477 if (!TARGET_INTER_UNIT_MOVES
)
31480 /* Between SSE and general, we have moves no larger than word size. */
31481 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
31489 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31490 enum machine_mode mode
, int strict
)
31492 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
31495 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31497 On the 80386, this is the size of MODE in words,
31498 except in the FP regs, where a single reg is always enough. */
31500 static unsigned char
31501 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
31503 if (MAYBE_INTEGER_CLASS_P (rclass
))
31505 if (mode
== XFmode
)
31506 return (TARGET_64BIT
? 2 : 3);
31507 else if (mode
== XCmode
)
31508 return (TARGET_64BIT
? 4 : 6);
31510 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
31514 if (COMPLEX_MODE_P (mode
))
31521 /* Return true if the registers in CLASS cannot represent the change from
31522 modes FROM to TO. */
31525 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
31526 enum reg_class regclass
)
31531 /* x87 registers can't do subreg at all, as all values are reformatted
31532 to extended precision. */
31533 if (MAYBE_FLOAT_CLASS_P (regclass
))
31536 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
31538 /* Vector registers do not support QI or HImode loads. If we don't
31539 disallow a change to these modes, reload will assume it's ok to
31540 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31541 the vec_dupv4hi pattern. */
31542 if (GET_MODE_SIZE (from
) < 4)
31545 /* Vector registers do not support subreg with nonzero offsets, which
31546 are otherwise valid for integer registers. Since we can't see
31547 whether we have a nonzero offset from here, prohibit all
31548 nonparadoxical subregs changing size. */
31549 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
31556 /* Return the cost of moving data of mode M between a
31557 register and memory. A value of 2 is the default; this cost is
31558 relative to those in `REGISTER_MOVE_COST'.
31560 This function is used extensively by register_move_cost that is used to
31561 build tables at startup. Make it inline in this case.
31562 When IN is 2, return maximum of in and out move cost.
31564 If moving between registers and memory is more expensive than
31565 between two registers, you should define this macro to express the
31568 Model also increased moving costs of QImode registers in non
31572 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
31576 if (FLOAT_CLASS_P (regclass
))
31594 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
31595 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
31597 if (SSE_CLASS_P (regclass
))
31600 switch (GET_MODE_SIZE (mode
))
31615 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
31616 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
31618 if (MMX_CLASS_P (regclass
))
31621 switch (GET_MODE_SIZE (mode
))
31633 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
31634 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
31636 switch (GET_MODE_SIZE (mode
))
31639 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
31642 return ix86_cost
->int_store
[0];
31643 if (TARGET_PARTIAL_REG_DEPENDENCY
31644 && optimize_function_for_speed_p (cfun
))
31645 cost
= ix86_cost
->movzbl_load
;
31647 cost
= ix86_cost
->int_load
[0];
31649 return MAX (cost
, ix86_cost
->int_store
[0]);
31655 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
31657 return ix86_cost
->movzbl_load
;
31659 return ix86_cost
->int_store
[0] + 4;
31664 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
31665 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
31667 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31668 if (mode
== TFmode
)
31671 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
31673 cost
= ix86_cost
->int_load
[2];
31675 cost
= ix86_cost
->int_store
[2];
31676 return (cost
* (((int) GET_MODE_SIZE (mode
)
31677 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
31682 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
31685 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
31689 /* Return the cost of moving data from a register in class CLASS1 to
31690 one in class CLASS2.
31692 It is not required that the cost always equal 2 when FROM is the same as TO;
31693 on some machines it is expensive to move between registers if they are not
31694 general registers. */
31697 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
31698 reg_class_t class2_i
)
31700 enum reg_class class1
= (enum reg_class
) class1_i
;
31701 enum reg_class class2
= (enum reg_class
) class2_i
;
31703 /* In case we require secondary memory, compute cost of the store followed
31704 by load. In order to avoid bad register allocation choices, we need
31705 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31707 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
31711 cost
+= inline_memory_move_cost (mode
, class1
, 2);
31712 cost
+= inline_memory_move_cost (mode
, class2
, 2);
31714 /* In case of copying from general_purpose_register we may emit multiple
31715 stores followed by single load causing memory size mismatch stall.
31716 Count this as arbitrarily high cost of 20. */
31717 if (targetm
.class_max_nregs (class1
, mode
)
31718 > targetm
.class_max_nregs (class2
, mode
))
31721 /* In the case of FP/MMX moves, the registers actually overlap, and we
31722 have to switch modes in order to treat them differently. */
31723 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
31724 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
31730 /* Moves between SSE/MMX and integer unit are expensive. */
31731 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
31732 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31734 /* ??? By keeping returned value relatively high, we limit the number
31735 of moves between integer and MMX/SSE registers for all targets.
31736 Additionally, high value prevents problem with x86_modes_tieable_p(),
31737 where integer modes in MMX/SSE registers are not tieable
31738 because of missing QImode and HImode moves to, from or between
31739 MMX/SSE registers. */
31740 return MAX (8, ix86_cost
->mmxsse_to_integer
);
31742 if (MAYBE_FLOAT_CLASS_P (class1
))
31743 return ix86_cost
->fp_move
;
31744 if (MAYBE_SSE_CLASS_P (class1
))
31745 return ix86_cost
->sse_move
;
31746 if (MAYBE_MMX_CLASS_P (class1
))
31747 return ix86_cost
->mmx_move
;
31751 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31755 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
31757 /* Flags and only flags can only hold CCmode values. */
31758 if (CC_REGNO_P (regno
))
31759 return GET_MODE_CLASS (mode
) == MODE_CC
;
31760 if (GET_MODE_CLASS (mode
) == MODE_CC
31761 || GET_MODE_CLASS (mode
) == MODE_RANDOM
31762 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
31764 if (FP_REGNO_P (regno
))
31765 return VALID_FP_MODE_P (mode
);
31766 if (SSE_REGNO_P (regno
))
31768 /* We implement the move patterns for all vector modes into and
31769 out of SSE registers, even when no operation instructions
31770 are available. OImode move is available only when AVX is
31772 return ((TARGET_AVX
&& mode
== OImode
)
31773 || VALID_AVX256_REG_MODE (mode
)
31774 || VALID_SSE_REG_MODE (mode
)
31775 || VALID_SSE2_REG_MODE (mode
)
31776 || VALID_MMX_REG_MODE (mode
)
31777 || VALID_MMX_REG_MODE_3DNOW (mode
));
31779 if (MMX_REGNO_P (regno
))
31781 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31782 so if the register is available at all, then we can move data of
31783 the given mode into or out of it. */
31784 return (VALID_MMX_REG_MODE (mode
)
31785 || VALID_MMX_REG_MODE_3DNOW (mode
));
31788 if (mode
== QImode
)
31790 /* Take care for QImode values - they can be in non-QI regs,
31791 but then they do cause partial register stalls. */
31792 if (regno
<= BX_REG
|| TARGET_64BIT
)
31794 if (!TARGET_PARTIAL_REG_STALL
)
31796 return !can_create_pseudo_p ();
31798 /* We handle both integer and floats in the general purpose registers. */
31799 else if (VALID_INT_MODE_P (mode
))
31801 else if (VALID_FP_MODE_P (mode
))
31803 else if (VALID_DFP_MODE_P (mode
))
31805 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31806 on to use that value in smaller contexts, this can easily force a
31807 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31808 supporting DImode, allow it. */
31809 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
31815 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31816 tieable integer mode. */
31819 ix86_tieable_integer_mode_p (enum machine_mode mode
)
31828 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
31831 return TARGET_64BIT
;
31838 /* Return true if MODE1 is accessible in a register that can hold MODE2
31839 without copying. That is, all register classes that can hold MODE2
31840 can also hold MODE1. */
31843 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
31845 if (mode1
== mode2
)
31848 if (ix86_tieable_integer_mode_p (mode1
)
31849 && ix86_tieable_integer_mode_p (mode2
))
31852 /* MODE2 being XFmode implies fp stack or general regs, which means we
31853 can tie any smaller floating point modes to it. Note that we do not
31854 tie this with TFmode. */
31855 if (mode2
== XFmode
)
31856 return mode1
== SFmode
|| mode1
== DFmode
;
31858 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31859 that we can tie it with SFmode. */
31860 if (mode2
== DFmode
)
31861 return mode1
== SFmode
;
31863 /* If MODE2 is only appropriate for an SSE register, then tie with
31864 any other mode acceptable to SSE registers. */
31865 if (GET_MODE_SIZE (mode2
) == 32
31866 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31867 return (GET_MODE_SIZE (mode1
) == 32
31868 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31869 if (GET_MODE_SIZE (mode2
) == 16
31870 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31871 return (GET_MODE_SIZE (mode1
) == 16
31872 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31874 /* If MODE2 is appropriate for an MMX register, then tie
31875 with any other mode acceptable to MMX registers. */
31876 if (GET_MODE_SIZE (mode2
) == 8
31877 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
31878 return (GET_MODE_SIZE (mode1
) == 8
31879 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
31884 /* Return the cost of moving between two registers of mode MODE. */
31887 ix86_set_reg_reg_cost (enum machine_mode mode
)
31889 unsigned int units
= UNITS_PER_WORD
;
31891 switch (GET_MODE_CLASS (mode
))
31897 units
= GET_MODE_SIZE (CCmode
);
31901 if ((TARGET_SSE2
&& mode
== TFmode
)
31902 || (TARGET_80387
&& mode
== XFmode
)
31903 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
31904 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
31905 units
= GET_MODE_SIZE (mode
);
31908 case MODE_COMPLEX_FLOAT
:
31909 if ((TARGET_SSE2
&& mode
== TCmode
)
31910 || (TARGET_80387
&& mode
== XCmode
)
31911 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
31912 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
31913 units
= GET_MODE_SIZE (mode
);
31916 case MODE_VECTOR_INT
:
31917 case MODE_VECTOR_FLOAT
:
31918 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
31919 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
31920 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
31921 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
31922 units
= GET_MODE_SIZE (mode
);
31925 /* Return the cost of moving between two registers of mode MODE,
31926 assuming that the move will be in pieces of at most UNITS bytes. */
31927 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
31930 /* Compute a (partial) cost for rtx X. Return true if the complete
31931 cost has been computed, and false if subexpressions should be
31932 scanned. In either case, *TOTAL contains the cost result. */
31935 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int opno
, int *total
,
31938 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
31939 enum machine_mode mode
= GET_MODE (x
);
31940 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
31945 if (register_operand (SET_DEST (x
), VOIDmode
)
31946 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
31948 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
31957 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
31959 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
31961 else if (flag_pic
&& SYMBOLIC_CONST (x
)
31963 || (!GET_CODE (x
) != LABEL_REF
31964 && (GET_CODE (x
) != SYMBOL_REF
31965 || !SYMBOL_REF_LOCAL_P (x
)))))
31972 if (mode
== VOIDmode
)
31975 switch (standard_80387_constant_p (x
))
31980 default: /* Other constants */
31985 /* Start with (MEM (SYMBOL_REF)), since that's where
31986 it'll probably end up. Add a penalty for size. */
31987 *total
= (COSTS_N_INSNS (1)
31988 + (flag_pic
!= 0 && !TARGET_64BIT
)
31989 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
31995 /* The zero extensions is often completely free on x86_64, so make
31996 it as cheap as possible. */
31997 if (TARGET_64BIT
&& mode
== DImode
31998 && GET_MODE (XEXP (x
, 0)) == SImode
)
32000 else if (TARGET_ZERO_EXTEND_WITH_AND
)
32001 *total
= cost
->add
;
32003 *total
= cost
->movzx
;
32007 *total
= cost
->movsx
;
32011 if (CONST_INT_P (XEXP (x
, 1))
32012 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
32014 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32017 *total
= cost
->add
;
32020 if ((value
== 2 || value
== 3)
32021 && cost
->lea
<= cost
->shift_const
)
32023 *total
= cost
->lea
;
32033 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
32035 if (CONST_INT_P (XEXP (x
, 1)))
32037 if (INTVAL (XEXP (x
, 1)) > 32)
32038 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
32040 *total
= cost
->shift_const
* 2;
32044 if (GET_CODE (XEXP (x
, 1)) == AND
)
32045 *total
= cost
->shift_var
* 2;
32047 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
32052 if (CONST_INT_P (XEXP (x
, 1)))
32053 *total
= cost
->shift_const
;
32055 *total
= cost
->shift_var
;
32063 gcc_assert (FLOAT_MODE_P (mode
));
32064 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
32066 /* ??? SSE scalar/vector cost should be used here. */
32067 /* ??? Bald assumption that fma has the same cost as fmul. */
32068 *total
= cost
->fmul
;
32069 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
32071 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
32073 if (GET_CODE (sub
) == NEG
)
32074 sub
= XEXP (sub
, 0);
32075 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
32078 if (GET_CODE (sub
) == NEG
)
32079 sub
= XEXP (sub
, 0);
32080 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
32085 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32087 /* ??? SSE scalar cost should be used here. */
32088 *total
= cost
->fmul
;
32091 else if (X87_FLOAT_MODE_P (mode
))
32093 *total
= cost
->fmul
;
32096 else if (FLOAT_MODE_P (mode
))
32098 /* ??? SSE vector cost should be used here. */
32099 *total
= cost
->fmul
;
32104 rtx op0
= XEXP (x
, 0);
32105 rtx op1
= XEXP (x
, 1);
32107 if (CONST_INT_P (XEXP (x
, 1)))
32109 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32110 for (nbits
= 0; value
!= 0; value
&= value
- 1)
32114 /* This is arbitrary. */
32117 /* Compute costs correctly for widening multiplication. */
32118 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
32119 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
32120 == GET_MODE_SIZE (mode
))
32122 int is_mulwiden
= 0;
32123 enum machine_mode inner_mode
= GET_MODE (op0
);
32125 if (GET_CODE (op0
) == GET_CODE (op1
))
32126 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
32127 else if (CONST_INT_P (op1
))
32129 if (GET_CODE (op0
) == SIGN_EXTEND
)
32130 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
32133 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
32137 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
32140 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
32141 + nbits
* cost
->mult_bit
32142 + rtx_cost (op0
, outer_code
, opno
, speed
)
32143 + rtx_cost (op1
, outer_code
, opno
, speed
));
32152 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32153 /* ??? SSE cost should be used here. */
32154 *total
= cost
->fdiv
;
32155 else if (X87_FLOAT_MODE_P (mode
))
32156 *total
= cost
->fdiv
;
32157 else if (FLOAT_MODE_P (mode
))
32158 /* ??? SSE vector cost should be used here. */
32159 *total
= cost
->fdiv
;
32161 *total
= cost
->divide
[MODE_INDEX (mode
)];
32165 if (GET_MODE_CLASS (mode
) == MODE_INT
32166 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
32168 if (GET_CODE (XEXP (x
, 0)) == PLUS
32169 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
32170 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
32171 && CONSTANT_P (XEXP (x
, 1)))
32173 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
32174 if (val
== 2 || val
== 4 || val
== 8)
32176 *total
= cost
->lea
;
32177 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32178 outer_code
, opno
, speed
);
32179 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
32180 outer_code
, opno
, speed
);
32181 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32185 else if (GET_CODE (XEXP (x
, 0)) == MULT
32186 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
32188 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
32189 if (val
== 2 || val
== 4 || val
== 8)
32191 *total
= cost
->lea
;
32192 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32193 outer_code
, opno
, speed
);
32194 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32198 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
32200 *total
= cost
->lea
;
32201 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32202 outer_code
, opno
, speed
);
32203 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32204 outer_code
, opno
, speed
);
32205 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32212 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32214 /* ??? SSE cost should be used here. */
32215 *total
= cost
->fadd
;
32218 else if (X87_FLOAT_MODE_P (mode
))
32220 *total
= cost
->fadd
;
32223 else if (FLOAT_MODE_P (mode
))
32225 /* ??? SSE vector cost should be used here. */
32226 *total
= cost
->fadd
;
32234 if (!TARGET_64BIT
&& mode
== DImode
)
32236 *total
= (cost
->add
* 2
32237 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
32238 << (GET_MODE (XEXP (x
, 0)) != DImode
))
32239 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
32240 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
32246 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32248 /* ??? SSE cost should be used here. */
32249 *total
= cost
->fchs
;
32252 else if (X87_FLOAT_MODE_P (mode
))
32254 *total
= cost
->fchs
;
32257 else if (FLOAT_MODE_P (mode
))
32259 /* ??? SSE vector cost should be used here. */
32260 *total
= cost
->fchs
;
32266 if (!TARGET_64BIT
&& mode
== DImode
)
32267 *total
= cost
->add
* 2;
32269 *total
= cost
->add
;
32273 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
32274 && XEXP (XEXP (x
, 0), 1) == const1_rtx
32275 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
32276 && XEXP (x
, 1) == const0_rtx
)
32278 /* This kind of construct is implemented using test[bwl].
32279 Treat it as if we had an AND. */
32280 *total
= (cost
->add
32281 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
32282 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
32288 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
32293 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32294 /* ??? SSE cost should be used here. */
32295 *total
= cost
->fabs
;
32296 else if (X87_FLOAT_MODE_P (mode
))
32297 *total
= cost
->fabs
;
32298 else if (FLOAT_MODE_P (mode
))
32299 /* ??? SSE vector cost should be used here. */
32300 *total
= cost
->fabs
;
32304 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32305 /* ??? SSE cost should be used here. */
32306 *total
= cost
->fsqrt
;
32307 else if (X87_FLOAT_MODE_P (mode
))
32308 *total
= cost
->fsqrt
;
32309 else if (FLOAT_MODE_P (mode
))
32310 /* ??? SSE vector cost should be used here. */
32311 *total
= cost
->fsqrt
;
32315 if (XINT (x
, 1) == UNSPEC_TP
)
32322 case VEC_DUPLICATE
:
32323 /* ??? Assume all of these vector manipulation patterns are
32324 recognizable. In which case they all pretty much have the
32326 *total
= COSTS_N_INSNS (1);
32336 static int current_machopic_label_num
;
32338 /* Given a symbol name and its associated stub, write out the
32339 definition of the stub. */
32342 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
32344 unsigned int length
;
32345 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
32346 int label
= ++current_machopic_label_num
;
32348 /* For 64-bit we shouldn't get here. */
32349 gcc_assert (!TARGET_64BIT
);
32351 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32352 symb
= targetm
.strip_name_encoding (symb
);
32354 length
= strlen (stub
);
32355 binder_name
= XALLOCAVEC (char, length
+ 32);
32356 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
32358 length
= strlen (symb
);
32359 symbol_name
= XALLOCAVEC (char, length
+ 32);
32360 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
32362 sprintf (lazy_ptr_name
, "L%d$lz", label
);
32364 if (MACHOPIC_ATT_STUB
)
32365 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
32366 else if (MACHOPIC_PURE
)
32367 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
32369 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
32371 fprintf (file
, "%s:\n", stub
);
32372 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32374 if (MACHOPIC_ATT_STUB
)
32376 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
32378 else if (MACHOPIC_PURE
)
32381 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32382 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
32383 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
32384 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
32385 label
, lazy_ptr_name
, label
);
32386 fprintf (file
, "\tjmp\t*%%ecx\n");
32389 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
32391 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
32392 it needs no stub-binding-helper. */
32393 if (MACHOPIC_ATT_STUB
)
32396 fprintf (file
, "%s:\n", binder_name
);
32400 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
32401 fprintf (file
, "\tpushl\t%%ecx\n");
32404 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
32406 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
32408 /* N.B. Keep the correspondence of these
32409 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
32410 old-pic/new-pic/non-pic stubs; altering this will break
32411 compatibility with existing dylibs. */
32414 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32415 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
32418 /* 16-byte -mdynamic-no-pic stub. */
32419 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
32421 fprintf (file
, "%s:\n", lazy_ptr_name
);
32422 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32423 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
32425 #endif /* TARGET_MACHO */
32427 /* Order the registers for register allocator. */
32430 x86_order_regs_for_local_alloc (void)
32435 /* First allocate the local general purpose registers. */
32436 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32437 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
32438 reg_alloc_order
[pos
++] = i
;
32440 /* Global general purpose registers. */
32441 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32442 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
32443 reg_alloc_order
[pos
++] = i
;
32445 /* x87 registers come first in case we are doing FP math
32447 if (!TARGET_SSE_MATH
)
32448 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32449 reg_alloc_order
[pos
++] = i
;
32451 /* SSE registers. */
32452 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
32453 reg_alloc_order
[pos
++] = i
;
32454 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
32455 reg_alloc_order
[pos
++] = i
;
32457 /* x87 registers. */
32458 if (TARGET_SSE_MATH
)
32459 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32460 reg_alloc_order
[pos
++] = i
;
32462 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
32463 reg_alloc_order
[pos
++] = i
;
32465 /* Initialize the rest of array as we do not allocate some registers
32467 while (pos
< FIRST_PSEUDO_REGISTER
)
32468 reg_alloc_order
[pos
++] = 0;
32471 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
32472 in struct attribute_spec handler. */
32474 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
32476 int flags ATTRIBUTE_UNUSED
,
32477 bool *no_add_attrs
)
32479 if (TREE_CODE (*node
) != FUNCTION_TYPE
32480 && TREE_CODE (*node
) != METHOD_TYPE
32481 && TREE_CODE (*node
) != FIELD_DECL
32482 && TREE_CODE (*node
) != TYPE_DECL
)
32484 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32486 *no_add_attrs
= true;
32491 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
32493 *no_add_attrs
= true;
32496 if (is_attribute_p ("callee_pop_aggregate_return", name
))
32500 cst
= TREE_VALUE (args
);
32501 if (TREE_CODE (cst
) != INTEGER_CST
)
32503 warning (OPT_Wattributes
,
32504 "%qE attribute requires an integer constant argument",
32506 *no_add_attrs
= true;
32508 else if (compare_tree_int (cst
, 0) != 0
32509 && compare_tree_int (cst
, 1) != 0)
32511 warning (OPT_Wattributes
,
32512 "argument to %qE attribute is neither zero, nor one",
32514 *no_add_attrs
= true;
32523 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
32524 struct attribute_spec.handler. */
32526 ix86_handle_abi_attribute (tree
*node
, tree name
,
32527 tree args ATTRIBUTE_UNUSED
,
32528 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32530 if (TREE_CODE (*node
) != FUNCTION_TYPE
32531 && TREE_CODE (*node
) != METHOD_TYPE
32532 && TREE_CODE (*node
) != FIELD_DECL
32533 && TREE_CODE (*node
) != TYPE_DECL
)
32535 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32537 *no_add_attrs
= true;
32541 /* Can combine regparm with all attributes but fastcall. */
32542 if (is_attribute_p ("ms_abi", name
))
32544 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
32546 error ("ms_abi and sysv_abi attributes are not compatible");
32551 else if (is_attribute_p ("sysv_abi", name
))
32553 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
32555 error ("ms_abi and sysv_abi attributes are not compatible");
32564 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32565 struct attribute_spec.handler. */
32567 ix86_handle_struct_attribute (tree
*node
, tree name
,
32568 tree args ATTRIBUTE_UNUSED
,
32569 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32572 if (DECL_P (*node
))
32574 if (TREE_CODE (*node
) == TYPE_DECL
)
32575 type
= &TREE_TYPE (*node
);
32580 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
32582 warning (OPT_Wattributes
, "%qE attribute ignored",
32584 *no_add_attrs
= true;
32587 else if ((is_attribute_p ("ms_struct", name
)
32588 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
32589 || ((is_attribute_p ("gcc_struct", name
)
32590 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
32592 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
32594 *no_add_attrs
= true;
32601 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
32602 tree args ATTRIBUTE_UNUSED
,
32603 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32605 if (TREE_CODE (*node
) != FUNCTION_DECL
)
32607 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32609 *no_add_attrs
= true;
32615 ix86_ms_bitfield_layout_p (const_tree record_type
)
32617 return ((TARGET_MS_BITFIELD_LAYOUT
32618 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
32619 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
32622 /* Returns an expression indicating where the this parameter is
32623 located on entry to the FUNCTION. */
32626 x86_this_parameter (tree function
)
32628 tree type
= TREE_TYPE (function
);
32629 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
32634 const int *parm_regs
;
32636 if (ix86_function_type_abi (type
) == MS_ABI
)
32637 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
32639 parm_regs
= x86_64_int_parameter_registers
;
32640 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
32643 nregs
= ix86_function_regparm (type
, function
);
32645 if (nregs
> 0 && !stdarg_p (type
))
32648 unsigned int ccvt
= ix86_get_callcvt (type
);
32650 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
32651 regno
= aggr
? DX_REG
: CX_REG
;
32652 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
32656 return gen_rtx_MEM (SImode
,
32657 plus_constant (Pmode
, stack_pointer_rtx
, 4));
32666 return gen_rtx_MEM (SImode
,
32667 plus_constant (Pmode
,
32668 stack_pointer_rtx
, 4));
32671 return gen_rtx_REG (SImode
, regno
);
32674 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
32678 /* Determine whether x86_output_mi_thunk can succeed. */
32681 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
32682 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
32683 HOST_WIDE_INT vcall_offset
, const_tree function
)
32685 /* 64-bit can handle anything. */
32689 /* For 32-bit, everything's fine if we have one free register. */
32690 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
32693 /* Need a free register for vcall_offset. */
32697 /* Need a free register for GOT references. */
32698 if (flag_pic
&& !targetm
.binds_local_p (function
))
32701 /* Otherwise ok. */
32705 /* Output the assembler code for a thunk function. THUNK_DECL is the
32706 declaration for the thunk function itself, FUNCTION is the decl for
32707 the target function. DELTA is an immediate constant offset to be
32708 added to THIS. If VCALL_OFFSET is nonzero, the word at
32709 *(*this + vcall_offset) should be added to THIS. */
32712 x86_output_mi_thunk (FILE *file
,
32713 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
32714 HOST_WIDE_INT vcall_offset
, tree function
)
32716 rtx this_param
= x86_this_parameter (function
);
32717 rtx this_reg
, tmp
, fnaddr
;
32719 emit_note (NOTE_INSN_PROLOGUE_END
);
32721 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32722 pull it in now and let DELTA benefit. */
32723 if (REG_P (this_param
))
32724 this_reg
= this_param
;
32725 else if (vcall_offset
)
32727 /* Put the this parameter into %eax. */
32728 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
32729 emit_move_insn (this_reg
, this_param
);
32732 this_reg
= NULL_RTX
;
32734 /* Adjust the this parameter by a fixed constant. */
32737 rtx delta_rtx
= GEN_INT (delta
);
32738 rtx delta_dst
= this_reg
? this_reg
: this_param
;
32742 if (!x86_64_general_operand (delta_rtx
, Pmode
))
32744 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
32745 emit_move_insn (tmp
, delta_rtx
);
32750 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
32753 /* Adjust the this parameter by a value stored in the vtable. */
32756 rtx vcall_addr
, vcall_mem
, this_mem
;
32757 unsigned int tmp_regno
;
32760 tmp_regno
= R10_REG
;
32763 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
32764 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
32765 tmp_regno
= AX_REG
;
32767 tmp_regno
= CX_REG
;
32769 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
32771 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
32772 if (Pmode
!= ptr_mode
)
32773 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
32774 emit_move_insn (tmp
, this_mem
);
32776 /* Adjust the this parameter. */
32777 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
32779 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
32781 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
32782 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
32783 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
32786 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
32787 if (Pmode
!= ptr_mode
)
32788 emit_insn (gen_addsi_1_zext (this_reg
,
32789 gen_rtx_REG (ptr_mode
,
32793 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
32796 /* If necessary, drop THIS back to its stack slot. */
32797 if (this_reg
&& this_reg
!= this_param
)
32798 emit_move_insn (this_param
, this_reg
);
32800 fnaddr
= XEXP (DECL_RTL (function
), 0);
32803 if (!flag_pic
|| targetm
.binds_local_p (function
)
32804 || cfun
->machine
->call_abi
== MS_ABI
)
32808 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
32809 tmp
= gen_rtx_CONST (Pmode
, tmp
);
32810 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
32815 if (!flag_pic
|| targetm
.binds_local_p (function
))
32818 else if (TARGET_MACHO
)
32820 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
32821 fnaddr
= XEXP (fnaddr
, 0);
32823 #endif /* TARGET_MACHO */
32826 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
32827 output_set_got (tmp
, NULL_RTX
);
32829 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
32830 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
32831 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
32835 /* Our sibling call patterns do not allow memories, because we have no
32836 predicate that can distinguish between frame and non-frame memory.
32837 For our purposes here, we can get away with (ab)using a jump pattern,
32838 because we're going to do no optimization. */
32839 if (MEM_P (fnaddr
))
32840 emit_jump_insn (gen_indirect_jump (fnaddr
));
32843 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
32844 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
32845 tmp
= emit_call_insn (tmp
);
32846 SIBLING_CALL_P (tmp
) = 1;
32850 /* Emit just enough of rest_of_compilation to get the insns emitted.
32851 Note that use_thunk calls assemble_start_function et al. */
32852 tmp
= get_insns ();
32853 insn_locators_alloc ();
32854 shorten_branches (tmp
);
32855 final_start_function (tmp
, file
, 1);
32856 final (tmp
, file
, 1);
32857 final_end_function ();
32861 x86_file_start (void)
32863 default_file_start ();
32865 darwin_file_start ();
32867 if (X86_FILE_START_VERSION_DIRECTIVE
)
32868 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
32869 if (X86_FILE_START_FLTUSED
)
32870 fputs ("\t.global\t__fltused\n", asm_out_file
);
32871 if (ix86_asm_dialect
== ASM_INTEL
)
32872 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
32876 x86_field_alignment (tree field
, int computed
)
32878 enum machine_mode mode
;
32879 tree type
= TREE_TYPE (field
);
32881 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
32883 mode
= TYPE_MODE (strip_array_types (type
));
32884 if (mode
== DFmode
|| mode
== DCmode
32885 || GET_MODE_CLASS (mode
) == MODE_INT
32886 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
32887 return MIN (32, computed
);
32891 /* Output assembler code to FILE to increment profiler label # LABELNO
32892 for profiling a function entry. */
32894 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
32896 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
32901 #ifndef NO_PROFILE_COUNTERS
32902 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
32905 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
32906 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
32908 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32912 #ifndef NO_PROFILE_COUNTERS
32913 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
32916 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
32920 #ifndef NO_PROFILE_COUNTERS
32921 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
32924 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32928 /* We don't have exact information about the insn sizes, but we may assume
32929 quite safely that we are informed about all 1 byte insns and memory
32930 address sizes. This is enough to eliminate unnecessary padding in
32934 min_insn_size (rtx insn
)
32938 if (!INSN_P (insn
) || !active_insn_p (insn
))
32941 /* Discard alignments we've emit and jump instructions. */
32942 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
32943 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
32945 if (JUMP_TABLE_DATA_P (insn
))
32948 /* Important case - calls are always 5 bytes.
32949 It is common to have many calls in the row. */
32951 && symbolic_reference_mentioned_p (PATTERN (insn
))
32952 && !SIBLING_CALL_P (insn
))
32954 len
= get_attr_length (insn
);
32958 /* For normal instructions we rely on get_attr_length being exact,
32959 with a few exceptions. */
32960 if (!JUMP_P (insn
))
32962 enum attr_type type
= get_attr_type (insn
);
32967 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
32968 || asm_noperands (PATTERN (insn
)) >= 0)
32975 /* Otherwise trust get_attr_length. */
32979 l
= get_attr_length_address (insn
);
32980 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
32989 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32991 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32995 ix86_avoid_jump_mispredicts (void)
32997 rtx insn
, start
= get_insns ();
32998 int nbytes
= 0, njumps
= 0;
33001 /* Look for all minimal intervals of instructions containing 4 jumps.
33002 The intervals are bounded by START and INSN. NBYTES is the total
33003 size of instructions in the interval including INSN and not including
33004 START. When the NBYTES is smaller than 16 bytes, it is possible
33005 that the end of START and INSN ends up in the same 16byte page.
33007 The smallest offset in the page INSN can start is the case where START
33008 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
33009 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
33011 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
33015 if (LABEL_P (insn
))
33017 int align
= label_to_alignment (insn
);
33018 int max_skip
= label_to_max_skip (insn
);
33022 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
33023 already in the current 16 byte page, because otherwise
33024 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
33025 bytes to reach 16 byte boundary. */
33027 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
33030 fprintf (dump_file
, "Label %i with max_skip %i\n",
33031 INSN_UID (insn
), max_skip
);
33034 while (nbytes
+ max_skip
>= 16)
33036 start
= NEXT_INSN (start
);
33037 if ((JUMP_P (start
)
33038 && GET_CODE (PATTERN (start
)) != ADDR_VEC
33039 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
33041 njumps
--, isjump
= 1;
33044 nbytes
-= min_insn_size (start
);
33050 min_size
= min_insn_size (insn
);
33051 nbytes
+= min_size
;
33053 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
33054 INSN_UID (insn
), min_size
);
33056 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
33057 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
33065 start
= NEXT_INSN (start
);
33066 if ((JUMP_P (start
)
33067 && GET_CODE (PATTERN (start
)) != ADDR_VEC
33068 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
33070 njumps
--, isjump
= 1;
33073 nbytes
-= min_insn_size (start
);
33075 gcc_assert (njumps
>= 0);
33077 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
33078 INSN_UID (start
), INSN_UID (insn
), nbytes
);
33080 if (njumps
== 3 && isjump
&& nbytes
< 16)
33082 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
33085 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
33086 INSN_UID (insn
), padsize
);
33087 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
33093 /* AMD Athlon works faster
33094 when RET is not destination of conditional jump or directly preceded
33095 by other jump instruction. We avoid the penalty by inserting NOP just
33096 before the RET instructions in such cases. */
33098 ix86_pad_returns (void)
33103 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33105 basic_block bb
= e
->src
;
33106 rtx ret
= BB_END (bb
);
33108 bool replace
= false;
33110 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
33111 || optimize_bb_for_size_p (bb
))
33113 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
33114 if (active_insn_p (prev
) || LABEL_P (prev
))
33116 if (prev
&& LABEL_P (prev
))
33121 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33122 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
33123 && !(e
->flags
& EDGE_FALLTHRU
))
33128 prev
= prev_active_insn (ret
);
33130 && ((JUMP_P (prev
) && any_condjump_p (prev
))
33133 /* Empty functions get branch mispredict even when
33134 the jump destination is not visible to us. */
33135 if (!prev
&& !optimize_function_for_size_p (cfun
))
33140 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
33146 /* Count the minimum number of instructions in BB. Return 4 if the
33147 number of instructions >= 4. */
33150 ix86_count_insn_bb (basic_block bb
)
33153 int insn_count
= 0;
33155 /* Count number of instructions in this block. Return 4 if the number
33156 of instructions >= 4. */
33157 FOR_BB_INSNS (bb
, insn
)
33159 /* Only happen in exit blocks. */
33161 && ANY_RETURN_P (PATTERN (insn
)))
33164 if (NONDEBUG_INSN_P (insn
)
33165 && GET_CODE (PATTERN (insn
)) != USE
33166 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
33169 if (insn_count
>= 4)
33178 /* Count the minimum number of instructions in code path in BB.
33179 Return 4 if the number of instructions >= 4. */
33182 ix86_count_insn (basic_block bb
)
33186 int min_prev_count
;
33188 /* Only bother counting instructions along paths with no
33189 more than 2 basic blocks between entry and exit. Given
33190 that BB has an edge to exit, determine if a predecessor
33191 of BB has an edge from entry. If so, compute the number
33192 of instructions in the predecessor block. If there
33193 happen to be multiple such blocks, compute the minimum. */
33194 min_prev_count
= 4;
33195 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33198 edge_iterator prev_ei
;
33200 if (e
->src
== ENTRY_BLOCK_PTR
)
33202 min_prev_count
= 0;
33205 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
33207 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
33209 int count
= ix86_count_insn_bb (e
->src
);
33210 if (count
< min_prev_count
)
33211 min_prev_count
= count
;
33217 if (min_prev_count
< 4)
33218 min_prev_count
+= ix86_count_insn_bb (bb
);
33220 return min_prev_count
;
33223 /* Pad short function to 4 instructions. */
33226 ix86_pad_short_function (void)
33231 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33233 rtx ret
= BB_END (e
->src
);
33234 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
33236 int insn_count
= ix86_count_insn (e
->src
);
33238 /* Pad short function. */
33239 if (insn_count
< 4)
33243 /* Find epilogue. */
33246 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
33247 insn
= PREV_INSN (insn
);
33252 /* Two NOPs count as one instruction. */
33253 insn_count
= 2 * (4 - insn_count
);
33254 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
33260 /* Implement machine specific optimizations. We implement padding of returns
33261 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
33265 /* We are freeing block_for_insn in the toplev to keep compatibility
33266 with old MDEP_REORGS that are not CFG based. Recompute it now. */
33267 compute_bb_for_insn ();
33269 /* Run the vzeroupper optimization if needed. */
33270 if (TARGET_VZEROUPPER
)
33271 move_or_delete_vzeroupper ();
33273 if (optimize
&& optimize_function_for_speed_p (cfun
))
33275 if (TARGET_PAD_SHORT_FUNCTION
)
33276 ix86_pad_short_function ();
33277 else if (TARGET_PAD_RETURNS
)
33278 ix86_pad_returns ();
33279 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33280 if (TARGET_FOUR_JUMP_LIMIT
)
33281 ix86_avoid_jump_mispredicts ();
33286 /* Return nonzero when QImode register that must be represented via REX prefix
33289 x86_extended_QIreg_mentioned_p (rtx insn
)
33292 extract_insn_cached (insn
);
33293 for (i
= 0; i
< recog_data
.n_operands
; i
++)
33294 if (REG_P (recog_data
.operand
[i
])
33295 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
33300 /* Return nonzero when P points to register encoded via REX prefix.
33301 Called via for_each_rtx. */
33303 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
33305 unsigned int regno
;
33308 regno
= REGNO (*p
);
33309 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
33312 /* Return true when INSN mentions register that must be encoded using REX
33315 x86_extended_reg_mentioned_p (rtx insn
)
33317 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
33318 extended_reg_mentioned_1
, NULL
);
33321 /* If profitable, negate (without causing overflow) integer constant
33322 of mode MODE at location LOC. Return true in this case. */
33324 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
33328 if (!CONST_INT_P (*loc
))
33334 /* DImode x86_64 constants must fit in 32 bits. */
33335 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
33346 gcc_unreachable ();
33349 /* Avoid overflows. */
33350 if (mode_signbit_p (mode
, *loc
))
33353 val
= INTVAL (*loc
);
33355 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
33356 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
33357 if ((val
< 0 && val
!= -128)
33360 *loc
= GEN_INT (-val
);
33367 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
33368 optabs would emit if we didn't have TFmode patterns. */
33371 x86_emit_floatuns (rtx operands
[2])
33373 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
33374 enum machine_mode mode
, inmode
;
33376 inmode
= GET_MODE (operands
[1]);
33377 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
33380 in
= force_reg (inmode
, operands
[1]);
33381 mode
= GET_MODE (out
);
33382 neglab
= gen_label_rtx ();
33383 donelab
= gen_label_rtx ();
33384 f0
= gen_reg_rtx (mode
);
33386 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
33388 expand_float (out
, in
, 0);
33390 emit_jump_insn (gen_jump (donelab
));
33393 emit_label (neglab
);
33395 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
33397 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
33399 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
33401 expand_float (f0
, i0
, 0);
33403 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
33405 emit_label (donelab
);
33408 /* AVX2 does support 32-byte integer vector operations,
33409 thus the longest vector we are faced with is V32QImode. */
33410 #define MAX_VECT_LEN 32
33412 struct expand_vec_perm_d
33414 rtx target
, op0
, op1
;
33415 unsigned char perm
[MAX_VECT_LEN
];
33416 enum machine_mode vmode
;
33417 unsigned char nelt
;
33418 bool one_operand_p
;
33422 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
33423 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
33424 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
33426 /* Get a vector mode of the same size as the original but with elements
33427 twice as wide. This is only guaranteed to apply to integral vectors. */
33429 static inline enum machine_mode
33430 get_mode_wider_vector (enum machine_mode o
)
33432 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
33433 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
33434 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
33435 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
33439 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33440 with all elements equal to VAR. Return true if successful. */
33443 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
33444 rtx target
, rtx val
)
33467 /* First attempt to recognize VAL as-is. */
33468 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33469 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
33470 if (recog_memoized (insn
) < 0)
33473 /* If that fails, force VAL into a register. */
33476 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
33477 seq
= get_insns ();
33480 emit_insn_before (seq
, insn
);
33482 ok
= recog_memoized (insn
) >= 0;
33491 if (TARGET_SSE
|| TARGET_3DNOW_A
)
33495 val
= gen_lowpart (SImode
, val
);
33496 x
= gen_rtx_TRUNCATE (HImode
, val
);
33497 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
33498 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33511 struct expand_vec_perm_d dperm
;
33515 memset (&dperm
, 0, sizeof (dperm
));
33516 dperm
.target
= target
;
33517 dperm
.vmode
= mode
;
33518 dperm
.nelt
= GET_MODE_NUNITS (mode
);
33519 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
33520 dperm
.one_operand_p
= true;
33522 /* Extend to SImode using a paradoxical SUBREG. */
33523 tmp1
= gen_reg_rtx (SImode
);
33524 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
33526 /* Insert the SImode value as low element of a V4SImode vector. */
33527 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
33528 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
33530 ok
= (expand_vec_perm_1 (&dperm
)
33531 || expand_vec_perm_broadcast_1 (&dperm
));
33543 /* Replicate the value once into the next wider mode and recurse. */
33545 enum machine_mode smode
, wsmode
, wvmode
;
33548 smode
= GET_MODE_INNER (mode
);
33549 wvmode
= get_mode_wider_vector (mode
);
33550 wsmode
= GET_MODE_INNER (wvmode
);
33552 val
= convert_modes (wsmode
, smode
, val
, true);
33553 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
33554 GEN_INT (GET_MODE_BITSIZE (smode
)),
33555 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33556 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
33558 x
= gen_lowpart (wvmode
, target
);
33559 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
33567 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
33568 rtx x
= gen_reg_rtx (hvmode
);
33570 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
33573 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
33574 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33583 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33584 whose ONE_VAR element is VAR, and other elements are zero. Return true
33588 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
33589 rtx target
, rtx var
, int one_var
)
33591 enum machine_mode vsimode
;
33594 bool use_vector_set
= false;
33599 /* For SSE4.1, we normally use vector set. But if the second
33600 element is zero and inter-unit moves are OK, we use movq
33602 use_vector_set
= (TARGET_64BIT
33604 && !(TARGET_INTER_UNIT_MOVES
33610 use_vector_set
= TARGET_SSE4_1
;
33613 use_vector_set
= TARGET_SSE2
;
33616 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
33623 use_vector_set
= TARGET_AVX
;
33626 /* Use ix86_expand_vector_set in 64bit mode only. */
33627 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
33633 if (use_vector_set
)
33635 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
33636 var
= force_reg (GET_MODE_INNER (mode
), var
);
33637 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33653 var
= force_reg (GET_MODE_INNER (mode
), var
);
33654 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
33655 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33660 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
33661 new_target
= gen_reg_rtx (mode
);
33663 new_target
= target
;
33664 var
= force_reg (GET_MODE_INNER (mode
), var
);
33665 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
33666 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
33667 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
33670 /* We need to shuffle the value to the correct position, so
33671 create a new pseudo to store the intermediate result. */
33673 /* With SSE2, we can use the integer shuffle insns. */
33674 if (mode
!= V4SFmode
&& TARGET_SSE2
)
33676 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
33678 GEN_INT (one_var
== 1 ? 0 : 1),
33679 GEN_INT (one_var
== 2 ? 0 : 1),
33680 GEN_INT (one_var
== 3 ? 0 : 1)));
33681 if (target
!= new_target
)
33682 emit_move_insn (target
, new_target
);
33686 /* Otherwise convert the intermediate result to V4SFmode and
33687 use the SSE1 shuffle instructions. */
33688 if (mode
!= V4SFmode
)
33690 tmp
= gen_reg_rtx (V4SFmode
);
33691 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
33696 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
33698 GEN_INT (one_var
== 1 ? 0 : 1),
33699 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
33700 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
33702 if (mode
!= V4SFmode
)
33703 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
33704 else if (tmp
!= target
)
33705 emit_move_insn (target
, tmp
);
33707 else if (target
!= new_target
)
33708 emit_move_insn (target
, new_target
);
33713 vsimode
= V4SImode
;
33719 vsimode
= V2SImode
;
33725 /* Zero extend the variable element to SImode and recurse. */
33726 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
33728 x
= gen_reg_rtx (vsimode
);
33729 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
33731 gcc_unreachable ();
33733 emit_move_insn (target
, gen_lowpart (mode
, x
));
33741 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33742 consisting of the values in VALS. It is known that all elements
33743 except ONE_VAR are constants. Return true if successful. */
33746 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
33747 rtx target
, rtx vals
, int one_var
)
33749 rtx var
= XVECEXP (vals
, 0, one_var
);
33750 enum machine_mode wmode
;
33753 const_vec
= copy_rtx (vals
);
33754 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
33755 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
33763 /* For the two element vectors, it's just as easy to use
33764 the general case. */
33768 /* Use ix86_expand_vector_set in 64bit mode only. */
33791 /* There's no way to set one QImode entry easily. Combine
33792 the variable value with its adjacent constant value, and
33793 promote to an HImode set. */
33794 x
= XVECEXP (vals
, 0, one_var
^ 1);
33797 var
= convert_modes (HImode
, QImode
, var
, true);
33798 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
33799 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33800 x
= GEN_INT (INTVAL (x
) & 0xff);
33804 var
= convert_modes (HImode
, QImode
, var
, true);
33805 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
33807 if (x
!= const0_rtx
)
33808 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
33809 1, OPTAB_LIB_WIDEN
);
33811 x
= gen_reg_rtx (wmode
);
33812 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
33813 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
33815 emit_move_insn (target
, gen_lowpart (mode
, x
));
33822 emit_move_insn (target
, const_vec
);
33823 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33827 /* A subroutine of ix86_expand_vector_init_general. Use vector
33828 concatenate to handle the most general case: all values variable,
33829 and none identical. */
33832 ix86_expand_vector_init_concat (enum machine_mode mode
,
33833 rtx target
, rtx
*ops
, int n
)
33835 enum machine_mode cmode
, hmode
= VOIDmode
;
33836 rtx first
[8], second
[4];
33876 gcc_unreachable ();
33879 if (!register_operand (ops
[1], cmode
))
33880 ops
[1] = force_reg (cmode
, ops
[1]);
33881 if (!register_operand (ops
[0], cmode
))
33882 ops
[0] = force_reg (cmode
, ops
[0]);
33883 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33884 gen_rtx_VEC_CONCAT (mode
, ops
[0],
33904 gcc_unreachable ();
33920 gcc_unreachable ();
33925 /* FIXME: We process inputs backward to help RA. PR 36222. */
33928 for (; i
> 0; i
-= 2, j
--)
33930 first
[j
] = gen_reg_rtx (cmode
);
33931 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
33932 ix86_expand_vector_init (false, first
[j
],
33933 gen_rtx_PARALLEL (cmode
, v
));
33939 gcc_assert (hmode
!= VOIDmode
);
33940 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33942 second
[j
] = gen_reg_rtx (hmode
);
33943 ix86_expand_vector_init_concat (hmode
, second
[j
],
33947 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
33950 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
33954 gcc_unreachable ();
33958 /* A subroutine of ix86_expand_vector_init_general. Use vector
33959 interleave to handle the most general case: all values variable,
33960 and none identical. */
33963 ix86_expand_vector_init_interleave (enum machine_mode mode
,
33964 rtx target
, rtx
*ops
, int n
)
33966 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
33969 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
33970 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
33971 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
33976 gen_load_even
= gen_vec_setv8hi
;
33977 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
33978 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33979 inner_mode
= HImode
;
33980 first_imode
= V4SImode
;
33981 second_imode
= V2DImode
;
33982 third_imode
= VOIDmode
;
33985 gen_load_even
= gen_vec_setv16qi
;
33986 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
33987 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
33988 inner_mode
= QImode
;
33989 first_imode
= V8HImode
;
33990 second_imode
= V4SImode
;
33991 third_imode
= V2DImode
;
33994 gcc_unreachable ();
33997 for (i
= 0; i
< n
; i
++)
33999 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
34000 op0
= gen_reg_rtx (SImode
);
34001 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
34003 /* Insert the SImode value as low element of V4SImode vector. */
34004 op1
= gen_reg_rtx (V4SImode
);
34005 op0
= gen_rtx_VEC_MERGE (V4SImode
,
34006 gen_rtx_VEC_DUPLICATE (V4SImode
,
34008 CONST0_RTX (V4SImode
),
34010 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
34012 /* Cast the V4SImode vector back to a vector in orignal mode. */
34013 op0
= gen_reg_rtx (mode
);
34014 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
34016 /* Load even elements into the second positon. */
34017 emit_insn (gen_load_even (op0
,
34018 force_reg (inner_mode
,
34022 /* Cast vector to FIRST_IMODE vector. */
34023 ops
[i
] = gen_reg_rtx (first_imode
);
34024 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
34027 /* Interleave low FIRST_IMODE vectors. */
34028 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
34030 op0
= gen_reg_rtx (first_imode
);
34031 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
34033 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
34034 ops
[j
] = gen_reg_rtx (second_imode
);
34035 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
34038 /* Interleave low SECOND_IMODE vectors. */
34039 switch (second_imode
)
34042 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
34044 op0
= gen_reg_rtx (second_imode
);
34045 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
34048 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
34050 ops
[j
] = gen_reg_rtx (third_imode
);
34051 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
34053 second_imode
= V2DImode
;
34054 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
34058 op0
= gen_reg_rtx (second_imode
);
34059 emit_insn (gen_interleave_second_low (op0
, ops
[0],
34062 /* Cast the SECOND_IMODE vector back to a vector on original
34064 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34065 gen_lowpart (mode
, op0
)));
34069 gcc_unreachable ();
34073 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
34074 all values variable, and none identical. */
34077 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
34078 rtx target
, rtx vals
)
34080 rtx ops
[32], op0
, op1
;
34081 enum machine_mode half_mode
= VOIDmode
;
34088 if (!mmx_ok
&& !TARGET_SSE
)
34100 n
= GET_MODE_NUNITS (mode
);
34101 for (i
= 0; i
< n
; i
++)
34102 ops
[i
] = XVECEXP (vals
, 0, i
);
34103 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
34107 half_mode
= V16QImode
;
34111 half_mode
= V8HImode
;
34115 n
= GET_MODE_NUNITS (mode
);
34116 for (i
= 0; i
< n
; i
++)
34117 ops
[i
] = XVECEXP (vals
, 0, i
);
34118 op0
= gen_reg_rtx (half_mode
);
34119 op1
= gen_reg_rtx (half_mode
);
34120 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
34122 ix86_expand_vector_init_interleave (half_mode
, op1
,
34123 &ops
[n
>> 1], n
>> 2);
34124 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34125 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
34129 if (!TARGET_SSE4_1
)
34137 /* Don't use ix86_expand_vector_init_interleave if we can't
34138 move from GPR to SSE register directly. */
34139 if (!TARGET_INTER_UNIT_MOVES
)
34142 n
= GET_MODE_NUNITS (mode
);
34143 for (i
= 0; i
< n
; i
++)
34144 ops
[i
] = XVECEXP (vals
, 0, i
);
34145 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
34153 gcc_unreachable ();
34157 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
34158 enum machine_mode inner_mode
;
34159 rtx words
[4], shift
;
34161 inner_mode
= GET_MODE_INNER (mode
);
34162 n_elts
= GET_MODE_NUNITS (mode
);
34163 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
34164 n_elt_per_word
= n_elts
/ n_words
;
34165 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
34167 for (i
= 0; i
< n_words
; ++i
)
34169 rtx word
= NULL_RTX
;
34171 for (j
= 0; j
< n_elt_per_word
; ++j
)
34173 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
34174 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
34180 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
34181 word
, 1, OPTAB_LIB_WIDEN
);
34182 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
34183 word
, 1, OPTAB_LIB_WIDEN
);
34191 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
34192 else if (n_words
== 2)
34194 rtx tmp
= gen_reg_rtx (mode
);
34195 emit_clobber (tmp
);
34196 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
34197 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
34198 emit_move_insn (target
, tmp
);
34200 else if (n_words
== 4)
34202 rtx tmp
= gen_reg_rtx (V4SImode
);
34203 gcc_assert (word_mode
== SImode
);
34204 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
34205 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
34206 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
34209 gcc_unreachable ();
34213 /* Initialize vector TARGET via VALS. Suppress the use of MMX
34214 instructions unless MMX_OK is true. */
34217 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
34219 enum machine_mode mode
= GET_MODE (target
);
34220 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34221 int n_elts
= GET_MODE_NUNITS (mode
);
34222 int n_var
= 0, one_var
= -1;
34223 bool all_same
= true, all_const_zero
= true;
34227 for (i
= 0; i
< n_elts
; ++i
)
34229 x
= XVECEXP (vals
, 0, i
);
34230 if (!(CONST_INT_P (x
)
34231 || GET_CODE (x
) == CONST_DOUBLE
34232 || GET_CODE (x
) == CONST_FIXED
))
34233 n_var
++, one_var
= i
;
34234 else if (x
!= CONST0_RTX (inner_mode
))
34235 all_const_zero
= false;
34236 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
34240 /* Constants are best loaded from the constant pool. */
34243 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
34247 /* If all values are identical, broadcast the value. */
34249 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
34250 XVECEXP (vals
, 0, 0)))
34253 /* Values where only one field is non-constant are best loaded from
34254 the pool and overwritten via move later. */
34258 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
34259 XVECEXP (vals
, 0, one_var
),
34263 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
34267 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
34271 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
34273 enum machine_mode mode
= GET_MODE (target
);
34274 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34275 enum machine_mode half_mode
;
34276 bool use_vec_merge
= false;
34278 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
34280 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
34281 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
34282 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
34283 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
34284 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
34285 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
34287 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
34289 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
34290 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
34291 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
34292 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
34293 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
34294 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
34304 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34305 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
34307 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34309 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34310 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34316 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
34320 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34321 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
34323 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34325 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34326 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34333 /* For the two element vectors, we implement a VEC_CONCAT with
34334 the extraction of the other element. */
34336 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
34337 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
34340 op0
= val
, op1
= tmp
;
34342 op0
= tmp
, op1
= val
;
34344 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
34345 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34350 use_vec_merge
= TARGET_SSE4_1
;
34357 use_vec_merge
= true;
34361 /* tmp = target = A B C D */
34362 tmp
= copy_to_reg (target
);
34363 /* target = A A B B */
34364 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
34365 /* target = X A B B */
34366 ix86_expand_vector_set (false, target
, val
, 0);
34367 /* target = A X C D */
34368 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34369 const1_rtx
, const0_rtx
,
34370 GEN_INT (2+4), GEN_INT (3+4)));
34374 /* tmp = target = A B C D */
34375 tmp
= copy_to_reg (target
);
34376 /* tmp = X B C D */
34377 ix86_expand_vector_set (false, tmp
, val
, 0);
34378 /* target = A B X D */
34379 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34380 const0_rtx
, const1_rtx
,
34381 GEN_INT (0+4), GEN_INT (3+4)));
34385 /* tmp = target = A B C D */
34386 tmp
= copy_to_reg (target
);
34387 /* tmp = X B C D */
34388 ix86_expand_vector_set (false, tmp
, val
, 0);
34389 /* target = A B X D */
34390 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34391 const0_rtx
, const1_rtx
,
34392 GEN_INT (2+4), GEN_INT (0+4)));
34396 gcc_unreachable ();
34401 use_vec_merge
= TARGET_SSE4_1
;
34405 /* Element 0 handled by vec_merge below. */
34408 use_vec_merge
= true;
34414 /* With SSE2, use integer shuffles to swap element 0 and ELT,
34415 store into element 0, then shuffle them back. */
34419 order
[0] = GEN_INT (elt
);
34420 order
[1] = const1_rtx
;
34421 order
[2] = const2_rtx
;
34422 order
[3] = GEN_INT (3);
34423 order
[elt
] = const0_rtx
;
34425 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34426 order
[1], order
[2], order
[3]));
34428 ix86_expand_vector_set (false, target
, val
, 0);
34430 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34431 order
[1], order
[2], order
[3]));
34435 /* For SSE1, we have to reuse the V4SF code. */
34436 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
34437 gen_lowpart (SFmode
, val
), elt
);
34442 use_vec_merge
= TARGET_SSE2
;
34445 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34449 use_vec_merge
= TARGET_SSE4_1
;
34456 half_mode
= V16QImode
;
34462 half_mode
= V8HImode
;
34468 half_mode
= V4SImode
;
34474 half_mode
= V2DImode
;
34480 half_mode
= V4SFmode
;
34486 half_mode
= V2DFmode
;
34492 /* Compute offset. */
34496 gcc_assert (i
<= 1);
34498 /* Extract the half. */
34499 tmp
= gen_reg_rtx (half_mode
);
34500 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
34502 /* Put val in tmp at elt. */
34503 ix86_expand_vector_set (false, tmp
, val
, elt
);
34506 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
34515 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
34516 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
34517 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34521 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
34523 emit_move_insn (mem
, target
);
34525 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34526 emit_move_insn (tmp
, val
);
34528 emit_move_insn (target
, mem
);
34533 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
34535 enum machine_mode mode
= GET_MODE (vec
);
34536 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34537 bool use_vec_extr
= false;
34550 use_vec_extr
= true;
34554 use_vec_extr
= TARGET_SSE4_1
;
34566 tmp
= gen_reg_rtx (mode
);
34567 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
34568 GEN_INT (elt
), GEN_INT (elt
),
34569 GEN_INT (elt
+4), GEN_INT (elt
+4)));
34573 tmp
= gen_reg_rtx (mode
);
34574 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
34578 gcc_unreachable ();
34581 use_vec_extr
= true;
34586 use_vec_extr
= TARGET_SSE4_1
;
34600 tmp
= gen_reg_rtx (mode
);
34601 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
34602 GEN_INT (elt
), GEN_INT (elt
),
34603 GEN_INT (elt
), GEN_INT (elt
)));
34607 tmp
= gen_reg_rtx (mode
);
34608 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
34612 gcc_unreachable ();
34615 use_vec_extr
= true;
34620 /* For SSE1, we have to reuse the V4SF code. */
34621 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
34622 gen_lowpart (V4SFmode
, vec
), elt
);
34628 use_vec_extr
= TARGET_SSE2
;
34631 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34635 use_vec_extr
= TARGET_SSE4_1
;
34641 tmp
= gen_reg_rtx (V4SFmode
);
34643 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
34645 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
34646 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34654 tmp
= gen_reg_rtx (V2DFmode
);
34656 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
34658 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
34659 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34667 tmp
= gen_reg_rtx (V16QImode
);
34669 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
34671 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
34672 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
34680 tmp
= gen_reg_rtx (V8HImode
);
34682 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
34684 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
34685 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
34693 tmp
= gen_reg_rtx (V4SImode
);
34695 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
34697 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
34698 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34706 tmp
= gen_reg_rtx (V2DImode
);
34708 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
34710 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
34711 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34717 /* ??? Could extract the appropriate HImode element and shift. */
34724 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
34725 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
34727 /* Let the rtl optimizers know about the zero extension performed. */
34728 if (inner_mode
== QImode
|| inner_mode
== HImode
)
34730 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
34731 target
= gen_lowpart (SImode
, target
);
34734 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34738 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
34740 emit_move_insn (mem
, vec
);
34742 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34743 emit_move_insn (target
, tmp
);
34747 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34748 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34749 The upper bits of DEST are undefined, though they shouldn't cause
34750 exceptions (some bits from src or all zeros are ok). */
34753 emit_reduc_half (rtx dest
, rtx src
, int i
)
34756 switch (GET_MODE (src
))
34760 tem
= gen_sse_movhlps (dest
, src
, src
);
34762 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
34763 GEN_INT (1 + 4), GEN_INT (1 + 4));
34766 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
34772 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
34773 gen_lowpart (V1TImode
, src
),
34778 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
34780 tem
= gen_avx_shufps256 (dest
, src
, src
,
34781 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
34785 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
34787 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
34794 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
34795 gen_lowpart (V4DImode
, src
),
34796 gen_lowpart (V4DImode
, src
),
34799 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
34800 gen_lowpart (V2TImode
, src
),
34804 gcc_unreachable ();
34809 /* Expand a vector reduction. FN is the binary pattern to reduce;
34810 DEST is the destination; IN is the input vector. */
34813 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
34815 rtx half
, dst
, vec
= in
;
34816 enum machine_mode mode
= GET_MODE (in
);
34819 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34821 && mode
== V8HImode
34822 && fn
== gen_uminv8hi3
)
34824 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
34828 for (i
= GET_MODE_BITSIZE (mode
);
34829 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
34832 half
= gen_reg_rtx (mode
);
34833 emit_reduc_half (half
, vec
, i
);
34834 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
34837 dst
= gen_reg_rtx (mode
);
34838 emit_insn (fn (dst
, half
, vec
));
34843 /* Target hook for scalar_mode_supported_p. */
34845 ix86_scalar_mode_supported_p (enum machine_mode mode
)
34847 if (DECIMAL_FLOAT_MODE_P (mode
))
34848 return default_decimal_float_supported_p ();
34849 else if (mode
== TFmode
)
34852 return default_scalar_mode_supported_p (mode
);
34855 /* Implements target hook vector_mode_supported_p. */
34857 ix86_vector_mode_supported_p (enum machine_mode mode
)
34859 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34861 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34863 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34865 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
34867 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
34872 /* Target hook for c_mode_for_suffix. */
34873 static enum machine_mode
34874 ix86_c_mode_for_suffix (char suffix
)
34884 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34886 We do this in the new i386 backend to maintain source compatibility
34887 with the old cc0-based compiler. */
34890 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
34891 tree inputs ATTRIBUTE_UNUSED
,
34894 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
34896 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
34901 /* Implements target vector targetm.asm.encode_section_info. */
34903 static void ATTRIBUTE_UNUSED
34904 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
34906 default_encode_section_info (decl
, rtl
, first
);
34908 if (TREE_CODE (decl
) == VAR_DECL
34909 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
34910 && ix86_in_large_data_p (decl
))
34911 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
34914 /* Worker function for REVERSE_CONDITION. */
34917 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
34919 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
34920 ? reverse_condition (code
)
34921 : reverse_condition_maybe_unordered (code
));
34924 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34928 output_387_reg_move (rtx insn
, rtx
*operands
)
34930 if (REG_P (operands
[0]))
34932 if (REG_P (operands
[1])
34933 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34935 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
34936 return output_387_ffreep (operands
, 0);
34937 return "fstp\t%y0";
34939 if (STACK_TOP_P (operands
[0]))
34940 return "fld%Z1\t%y1";
34943 else if (MEM_P (operands
[0]))
34945 gcc_assert (REG_P (operands
[1]));
34946 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34947 return "fstp%Z0\t%y0";
34950 /* There is no non-popping store to memory for XFmode.
34951 So if we need one, follow the store with a load. */
34952 if (GET_MODE (operands
[0]) == XFmode
)
34953 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34955 return "fst%Z0\t%y0";
34962 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34963 FP status register is set. */
34966 ix86_emit_fp_unordered_jump (rtx label
)
34968 rtx reg
= gen_reg_rtx (HImode
);
34971 emit_insn (gen_x86_fnstsw_1 (reg
));
34973 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
34975 emit_insn (gen_x86_sahf_1 (reg
));
34977 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
34978 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
34982 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
34984 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34985 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
34988 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
34989 gen_rtx_LABEL_REF (VOIDmode
, label
),
34991 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
34993 emit_jump_insn (temp
);
34994 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
34997 /* Output code to perform a log1p XFmode calculation. */
34999 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
35001 rtx label1
= gen_label_rtx ();
35002 rtx label2
= gen_label_rtx ();
35004 rtx tmp
= gen_reg_rtx (XFmode
);
35005 rtx tmp2
= gen_reg_rtx (XFmode
);
35008 emit_insn (gen_absxf2 (tmp
, op1
));
35009 test
= gen_rtx_GE (VOIDmode
, tmp
,
35010 CONST_DOUBLE_FROM_REAL_VALUE (
35011 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
35013 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
35015 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
35016 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
35017 emit_jump (label2
);
35019 emit_label (label1
);
35020 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
35021 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
35022 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
35023 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
35025 emit_label (label2
);
35028 /* Emit code for round calculation. */
35029 void ix86_emit_i387_round (rtx op0
, rtx op1
)
35031 enum machine_mode inmode
= GET_MODE (op1
);
35032 enum machine_mode outmode
= GET_MODE (op0
);
35033 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
35034 rtx scratch
= gen_reg_rtx (HImode
);
35035 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
35036 rtx jump_label
= gen_label_rtx ();
35038 rtx (*gen_abs
) (rtx
, rtx
);
35039 rtx (*gen_neg
) (rtx
, rtx
);
35044 gen_abs
= gen_abssf2
;
35047 gen_abs
= gen_absdf2
;
35050 gen_abs
= gen_absxf2
;
35053 gcc_unreachable ();
35059 gen_neg
= gen_negsf2
;
35062 gen_neg
= gen_negdf2
;
35065 gen_neg
= gen_negxf2
;
35068 gen_neg
= gen_neghi2
;
35071 gen_neg
= gen_negsi2
;
35074 gen_neg
= gen_negdi2
;
35077 gcc_unreachable ();
35080 e1
= gen_reg_rtx (inmode
);
35081 e2
= gen_reg_rtx (inmode
);
35082 res
= gen_reg_rtx (outmode
);
35084 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
35086 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
35088 /* scratch = fxam(op1) */
35089 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
35090 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
35092 /* e1 = fabs(op1) */
35093 emit_insn (gen_abs (e1
, op1
));
35095 /* e2 = e1 + 0.5 */
35096 half
= force_reg (inmode
, half
);
35097 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35098 gen_rtx_PLUS (inmode
, e1
, half
)));
35100 /* res = floor(e2) */
35101 if (inmode
!= XFmode
)
35103 tmp1
= gen_reg_rtx (XFmode
);
35105 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
35106 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
35116 rtx tmp0
= gen_reg_rtx (XFmode
);
35118 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
35120 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35121 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
35122 UNSPEC_TRUNC_NOOP
)));
35126 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
35129 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
35132 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
35135 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
35138 gcc_unreachable ();
35141 /* flags = signbit(a) */
35142 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
35144 /* if (flags) then res = -res */
35145 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
35146 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
35147 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
35149 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35150 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
35151 JUMP_LABEL (insn
) = jump_label
;
35153 emit_insn (gen_neg (res
, res
));
35155 emit_label (jump_label
);
35156 LABEL_NUSES (jump_label
) = 1;
35158 emit_move_insn (op0
, res
);
35161 /* Output code to perform a Newton-Rhapson approximation of a single precision
35162 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
35164 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
35166 rtx x0
, x1
, e0
, e1
;
35168 x0
= gen_reg_rtx (mode
);
35169 e0
= gen_reg_rtx (mode
);
35170 e1
= gen_reg_rtx (mode
);
35171 x1
= gen_reg_rtx (mode
);
35173 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
35175 b
= force_reg (mode
, b
);
35177 /* x0 = rcp(b) estimate */
35178 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35179 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
35182 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35183 gen_rtx_MULT (mode
, x0
, b
)));
35186 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35187 gen_rtx_MULT (mode
, x0
, e0
)));
35190 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35191 gen_rtx_PLUS (mode
, x0
, x0
)));
35194 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
35195 gen_rtx_MINUS (mode
, e1
, e0
)));
35198 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35199 gen_rtx_MULT (mode
, a
, x1
)));
35202 /* Output code to perform a Newton-Rhapson approximation of a
35203 single precision floating point [reciprocal] square root. */
35205 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
35208 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
35211 x0
= gen_reg_rtx (mode
);
35212 e0
= gen_reg_rtx (mode
);
35213 e1
= gen_reg_rtx (mode
);
35214 e2
= gen_reg_rtx (mode
);
35215 e3
= gen_reg_rtx (mode
);
35217 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
35218 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35220 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
35221 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35223 if (VECTOR_MODE_P (mode
))
35225 mthree
= ix86_build_const_vector (mode
, true, mthree
);
35226 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
35229 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
35230 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
35232 a
= force_reg (mode
, a
);
35234 /* x0 = rsqrt(a) estimate */
35235 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35236 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
35239 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
35244 zero
= gen_reg_rtx (mode
);
35245 mask
= gen_reg_rtx (mode
);
35247 zero
= force_reg (mode
, CONST0_RTX(mode
));
35248 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35249 gen_rtx_NE (mode
, zero
, a
)));
35251 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35252 gen_rtx_AND (mode
, x0
, mask
)));
35256 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35257 gen_rtx_MULT (mode
, x0
, a
)));
35259 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35260 gen_rtx_MULT (mode
, e0
, x0
)));
35263 mthree
= force_reg (mode
, mthree
);
35264 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35265 gen_rtx_PLUS (mode
, e1
, mthree
)));
35267 mhalf
= force_reg (mode
, mhalf
);
35269 /* e3 = -.5 * x0 */
35270 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35271 gen_rtx_MULT (mode
, x0
, mhalf
)));
35273 /* e3 = -.5 * e0 */
35274 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35275 gen_rtx_MULT (mode
, e0
, mhalf
)));
35276 /* ret = e2 * e3 */
35277 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35278 gen_rtx_MULT (mode
, e2
, e3
)));
35281 #ifdef TARGET_SOLARIS
35282 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
35285 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
35288 /* With Binutils 2.15, the "@unwind" marker must be specified on
35289 every occurrence of the ".eh_frame" section, not just the first
35292 && strcmp (name
, ".eh_frame") == 0)
35294 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
35295 flags
& SECTION_WRITE
? "aw" : "a");
35300 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
35302 solaris_elf_asm_comdat_section (name
, flags
, decl
);
35307 default_elf_asm_named_section (name
, flags
, decl
);
35309 #endif /* TARGET_SOLARIS */
35311 /* Return the mangling of TYPE if it is an extended fundamental type. */
35313 static const char *
35314 ix86_mangle_type (const_tree type
)
35316 type
= TYPE_MAIN_VARIANT (type
);
35318 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
35319 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
35322 switch (TYPE_MODE (type
))
35325 /* __float128 is "g". */
35328 /* "long double" or __float80 is "e". */
35335 /* For 32-bit code we can save PIC register setup by using
35336 __stack_chk_fail_local hidden function instead of calling
35337 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
35338 register, so it is better to call __stack_chk_fail directly. */
35340 static tree ATTRIBUTE_UNUSED
35341 ix86_stack_protect_fail (void)
35343 return TARGET_64BIT
35344 ? default_external_stack_protect_fail ()
35345 : default_hidden_stack_protect_fail ();
35348 /* Select a format to encode pointers in exception handling data. CODE
35349 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
35350 true if the symbol may be affected by dynamic relocations.
35352 ??? All x86 object file formats are capable of representing this.
35353 After all, the relocation needed is the same as for the call insn.
35354 Whether or not a particular assembler allows us to enter such, I
35355 guess we'll have to see. */
35357 asm_preferred_eh_data_format (int code
, int global
)
35361 int type
= DW_EH_PE_sdata8
;
35363 || ix86_cmodel
== CM_SMALL_PIC
35364 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
35365 type
= DW_EH_PE_sdata4
;
35366 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
35368 if (ix86_cmodel
== CM_SMALL
35369 || (ix86_cmodel
== CM_MEDIUM
&& code
))
35370 return DW_EH_PE_udata4
;
35371 return DW_EH_PE_absptr
;
35374 /* Expand copysign from SIGN to the positive value ABS_VALUE
35375 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
35378 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
35380 enum machine_mode mode
= GET_MODE (sign
);
35381 rtx sgn
= gen_reg_rtx (mode
);
35382 if (mask
== NULL_RTX
)
35384 enum machine_mode vmode
;
35386 if (mode
== SFmode
)
35388 else if (mode
== DFmode
)
35393 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
35394 if (!VECTOR_MODE_P (mode
))
35396 /* We need to generate a scalar mode mask in this case. */
35397 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35398 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35399 mask
= gen_reg_rtx (mode
);
35400 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35404 mask
= gen_rtx_NOT (mode
, mask
);
35405 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
35406 gen_rtx_AND (mode
, mask
, sign
)));
35407 emit_insn (gen_rtx_SET (VOIDmode
, result
,
35408 gen_rtx_IOR (mode
, abs_value
, sgn
)));
35411 /* Expand fabs (OP0) and return a new rtx that holds the result. The
35412 mask for masking out the sign-bit is stored in *SMASK, if that is
35415 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
35417 enum machine_mode vmode
, mode
= GET_MODE (op0
);
35420 xa
= gen_reg_rtx (mode
);
35421 if (mode
== SFmode
)
35423 else if (mode
== DFmode
)
35427 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
35428 if (!VECTOR_MODE_P (mode
))
35430 /* We need to generate a scalar mode mask in this case. */
35431 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35432 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35433 mask
= gen_reg_rtx (mode
);
35434 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35436 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
35437 gen_rtx_AND (mode
, op0
, mask
)));
35445 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
35446 swapping the operands if SWAP_OPERANDS is true. The expanded
35447 code is a forward jump to a newly created label in case the
35448 comparison is true. The generated label rtx is returned. */
35450 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
35451 bool swap_operands
)
35462 label
= gen_label_rtx ();
35463 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
35464 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35465 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
35466 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
35467 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
35468 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
35469 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35470 JUMP_LABEL (tmp
) = label
;
35475 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
35476 using comparison code CODE. Operands are swapped for the comparison if
35477 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
35479 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
35480 bool swap_operands
)
35482 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
35483 enum machine_mode mode
= GET_MODE (op0
);
35484 rtx mask
= gen_reg_rtx (mode
);
35493 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
35495 emit_insn (insn (mask
, op0
, op1
,
35496 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
35500 /* Generate and return a rtx of mode MODE for 2**n where n is the number
35501 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
35503 ix86_gen_TWO52 (enum machine_mode mode
)
35505 REAL_VALUE_TYPE TWO52r
;
35508 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
35509 TWO52
= const_double_from_real_value (TWO52r
, mode
);
35510 TWO52
= force_reg (mode
, TWO52
);
35515 /* Expand SSE sequence for computing lround from OP1 storing
35518 ix86_expand_lround (rtx op0
, rtx op1
)
35520 /* C code for the stuff we're doing below:
35521 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
35524 enum machine_mode mode
= GET_MODE (op1
);
35525 const struct real_format
*fmt
;
35526 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35529 /* load nextafter (0.5, 0.0) */
35530 fmt
= REAL_MODE_FORMAT (mode
);
35531 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35532 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35534 /* adj = copysign (0.5, op1) */
35535 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35536 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
35538 /* adj = op1 + adj */
35539 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35541 /* op0 = (imode)adj */
35542 expand_fix (op0
, adj
, 0);
35545 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35548 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
35550 /* C code for the stuff we're doing below (for do_floor):
35552 xi -= (double)xi > op1 ? 1 : 0;
35555 enum machine_mode fmode
= GET_MODE (op1
);
35556 enum machine_mode imode
= GET_MODE (op0
);
35557 rtx ireg
, freg
, label
, tmp
;
35559 /* reg = (long)op1 */
35560 ireg
= gen_reg_rtx (imode
);
35561 expand_fix (ireg
, op1
, 0);
35563 /* freg = (double)reg */
35564 freg
= gen_reg_rtx (fmode
);
35565 expand_float (freg
, ireg
, 0);
35567 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35568 label
= ix86_expand_sse_compare_and_jump (UNLE
,
35569 freg
, op1
, !do_floor
);
35570 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
35571 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
35572 emit_move_insn (ireg
, tmp
);
35574 emit_label (label
);
35575 LABEL_NUSES (label
) = 1;
35577 emit_move_insn (op0
, ireg
);
35580 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35581 result in OPERAND0. */
35583 ix86_expand_rint (rtx operand0
, rtx operand1
)
35585 /* C code for the stuff we're doing below:
35586 xa = fabs (operand1);
35587 if (!isless (xa, 2**52))
35589 xa = xa + 2**52 - 2**52;
35590 return copysign (xa, operand1);
35592 enum machine_mode mode
= GET_MODE (operand0
);
35593 rtx res
, xa
, label
, TWO52
, mask
;
35595 res
= gen_reg_rtx (mode
);
35596 emit_move_insn (res
, operand1
);
35598 /* xa = abs (operand1) */
35599 xa
= ix86_expand_sse_fabs (res
, &mask
);
35601 /* if (!isless (xa, TWO52)) goto label; */
35602 TWO52
= ix86_gen_TWO52 (mode
);
35603 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35605 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35606 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35608 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
35610 emit_label (label
);
35611 LABEL_NUSES (label
) = 1;
35613 emit_move_insn (operand0
, res
);
35616 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35619 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
35621 /* C code for the stuff we expand below.
35622 double xa = fabs (x), x2;
35623 if (!isless (xa, TWO52))
35625 xa = xa + TWO52 - TWO52;
35626 x2 = copysign (xa, x);
35635 enum machine_mode mode
= GET_MODE (operand0
);
35636 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
35638 TWO52
= ix86_gen_TWO52 (mode
);
35640 /* Temporary for holding the result, initialized to the input
35641 operand to ease control flow. */
35642 res
= gen_reg_rtx (mode
);
35643 emit_move_insn (res
, operand1
);
35645 /* xa = abs (operand1) */
35646 xa
= ix86_expand_sse_fabs (res
, &mask
);
35648 /* if (!isless (xa, TWO52)) goto label; */
35649 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35651 /* xa = xa + TWO52 - TWO52; */
35652 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35653 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35655 /* xa = copysign (xa, operand1) */
35656 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
35658 /* generate 1.0 or -1.0 */
35659 one
= force_reg (mode
,
35660 const_double_from_real_value (do_floor
35661 ? dconst1
: dconstm1
, mode
));
35663 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35664 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35665 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35666 gen_rtx_AND (mode
, one
, tmp
)));
35667 /* We always need to subtract here to preserve signed zero. */
35668 tmp
= expand_simple_binop (mode
, MINUS
,
35669 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35670 emit_move_insn (res
, tmp
);
35672 emit_label (label
);
35673 LABEL_NUSES (label
) = 1;
35675 emit_move_insn (operand0
, res
);
35678 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35681 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
35683 /* C code for the stuff we expand below.
35684 double xa = fabs (x), x2;
35685 if (!isless (xa, TWO52))
35687 x2 = (double)(long)x;
35694 if (HONOR_SIGNED_ZEROS (mode))
35695 return copysign (x2, x);
35698 enum machine_mode mode
= GET_MODE (operand0
);
35699 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
35701 TWO52
= ix86_gen_TWO52 (mode
);
35703 /* Temporary for holding the result, initialized to the input
35704 operand to ease control flow. */
35705 res
= gen_reg_rtx (mode
);
35706 emit_move_insn (res
, operand1
);
35708 /* xa = abs (operand1) */
35709 xa
= ix86_expand_sse_fabs (res
, &mask
);
35711 /* if (!isless (xa, TWO52)) goto label; */
35712 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35714 /* xa = (double)(long)x */
35715 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35716 expand_fix (xi
, res
, 0);
35717 expand_float (xa
, xi
, 0);
35720 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35722 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35723 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35724 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35725 gen_rtx_AND (mode
, one
, tmp
)));
35726 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
35727 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35728 emit_move_insn (res
, tmp
);
35730 if (HONOR_SIGNED_ZEROS (mode
))
35731 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35733 emit_label (label
);
35734 LABEL_NUSES (label
) = 1;
35736 emit_move_insn (operand0
, res
);
35739 /* Expand SSE sequence for computing round from OPERAND1 storing
35740 into OPERAND0. Sequence that works without relying on DImode truncation
35741 via cvttsd2siq that is only available on 64bit targets. */
35743 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
35745 /* C code for the stuff we expand below.
35746 double xa = fabs (x), xa2, x2;
35747 if (!isless (xa, TWO52))
35749 Using the absolute value and copying back sign makes
35750 -0.0 -> -0.0 correct.
35751 xa2 = xa + TWO52 - TWO52;
35756 else if (dxa > 0.5)
35758 x2 = copysign (xa2, x);
35761 enum machine_mode mode
= GET_MODE (operand0
);
35762 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
35764 TWO52
= ix86_gen_TWO52 (mode
);
35766 /* Temporary for holding the result, initialized to the input
35767 operand to ease control flow. */
35768 res
= gen_reg_rtx (mode
);
35769 emit_move_insn (res
, operand1
);
35771 /* xa = abs (operand1) */
35772 xa
= ix86_expand_sse_fabs (res
, &mask
);
35774 /* if (!isless (xa, TWO52)) goto label; */
35775 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35777 /* xa2 = xa + TWO52 - TWO52; */
35778 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35779 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
35781 /* dxa = xa2 - xa; */
35782 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
35784 /* generate 0.5, 1.0 and -0.5 */
35785 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
35786 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35787 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
35791 tmp
= gen_reg_rtx (mode
);
35792 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35793 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
35794 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35795 gen_rtx_AND (mode
, one
, tmp
)));
35796 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35797 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35798 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
35799 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35800 gen_rtx_AND (mode
, one
, tmp
)));
35801 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35803 /* res = copysign (xa2, operand1) */
35804 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
35806 emit_label (label
);
35807 LABEL_NUSES (label
) = 1;
35809 emit_move_insn (operand0
, res
);
35812 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35815 ix86_expand_trunc (rtx operand0
, rtx operand1
)
35817 /* C code for SSE variant we expand below.
35818 double xa = fabs (x), x2;
35819 if (!isless (xa, TWO52))
35821 x2 = (double)(long)x;
35822 if (HONOR_SIGNED_ZEROS (mode))
35823 return copysign (x2, x);
35826 enum machine_mode mode
= GET_MODE (operand0
);
35827 rtx xa
, xi
, TWO52
, label
, res
, mask
;
35829 TWO52
= ix86_gen_TWO52 (mode
);
35831 /* Temporary for holding the result, initialized to the input
35832 operand to ease control flow. */
35833 res
= gen_reg_rtx (mode
);
35834 emit_move_insn (res
, operand1
);
35836 /* xa = abs (operand1) */
35837 xa
= ix86_expand_sse_fabs (res
, &mask
);
35839 /* if (!isless (xa, TWO52)) goto label; */
35840 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35842 /* x = (double)(long)x */
35843 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35844 expand_fix (xi
, res
, 0);
35845 expand_float (res
, xi
, 0);
35847 if (HONOR_SIGNED_ZEROS (mode
))
35848 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35850 emit_label (label
);
35851 LABEL_NUSES (label
) = 1;
35853 emit_move_insn (operand0
, res
);
35856 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35859 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
35861 enum machine_mode mode
= GET_MODE (operand0
);
35862 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
35864 /* C code for SSE variant we expand below.
35865 double xa = fabs (x), x2;
35866 if (!isless (xa, TWO52))
35868 xa2 = xa + TWO52 - TWO52;
35872 x2 = copysign (xa2, x);
35876 TWO52
= ix86_gen_TWO52 (mode
);
35878 /* Temporary for holding the result, initialized to the input
35879 operand to ease control flow. */
35880 res
= gen_reg_rtx (mode
);
35881 emit_move_insn (res
, operand1
);
35883 /* xa = abs (operand1) */
35884 xa
= ix86_expand_sse_fabs (res
, &smask
);
35886 /* if (!isless (xa, TWO52)) goto label; */
35887 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35889 /* res = xa + TWO52 - TWO52; */
35890 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35891 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
35892 emit_move_insn (res
, tmp
);
35895 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35897 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35898 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
35899 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35900 gen_rtx_AND (mode
, mask
, one
)));
35901 tmp
= expand_simple_binop (mode
, MINUS
,
35902 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
35903 emit_move_insn (res
, tmp
);
35905 /* res = copysign (res, operand1) */
35906 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
35908 emit_label (label
);
35909 LABEL_NUSES (label
) = 1;
35911 emit_move_insn (operand0
, res
);
35914 /* Expand SSE sequence for computing round from OPERAND1 storing
35917 ix86_expand_round (rtx operand0
, rtx operand1
)
35919 /* C code for the stuff we're doing below:
35920 double xa = fabs (x);
35921 if (!isless (xa, TWO52))
35923 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35924 return copysign (xa, x);
35926 enum machine_mode mode
= GET_MODE (operand0
);
35927 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
35928 const struct real_format
*fmt
;
35929 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35931 /* Temporary for holding the result, initialized to the input
35932 operand to ease control flow. */
35933 res
= gen_reg_rtx (mode
);
35934 emit_move_insn (res
, operand1
);
35936 TWO52
= ix86_gen_TWO52 (mode
);
35937 xa
= ix86_expand_sse_fabs (res
, &mask
);
35938 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35940 /* load nextafter (0.5, 0.0) */
35941 fmt
= REAL_MODE_FORMAT (mode
);
35942 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35943 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35945 /* xa = xa + 0.5 */
35946 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35947 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35949 /* xa = (double)(int64_t)xa */
35950 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35951 expand_fix (xi
, xa
, 0);
35952 expand_float (xa
, xi
, 0);
35954 /* res = copysign (xa, operand1) */
35955 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
35957 emit_label (label
);
35958 LABEL_NUSES (label
) = 1;
35960 emit_move_insn (operand0
, res
);
35963 /* Expand SSE sequence for computing round
35964 from OP1 storing into OP0 using sse4 round insn. */
35966 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
35968 enum machine_mode mode
= GET_MODE (op0
);
35969 rtx e1
, e2
, res
, half
;
35970 const struct real_format
*fmt
;
35971 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35972 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
35973 rtx (*gen_round
) (rtx
, rtx
, rtx
);
35978 gen_copysign
= gen_copysignsf3
;
35979 gen_round
= gen_sse4_1_roundsf2
;
35982 gen_copysign
= gen_copysigndf3
;
35983 gen_round
= gen_sse4_1_rounddf2
;
35986 gcc_unreachable ();
35989 /* round (a) = trunc (a + copysign (0.5, a)) */
35991 /* load nextafter (0.5, 0.0) */
35992 fmt
= REAL_MODE_FORMAT (mode
);
35993 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35994 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35995 half
= const_double_from_real_value (pred_half
, mode
);
35997 /* e1 = copysign (0.5, op1) */
35998 e1
= gen_reg_rtx (mode
);
35999 emit_insn (gen_copysign (e1
, half
, op1
));
36001 /* e2 = op1 + e1 */
36002 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
36004 /* res = trunc (e2) */
36005 res
= gen_reg_rtx (mode
);
36006 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
36008 emit_move_insn (op0
, res
);
36012 /* Table of valid machine attributes. */
36013 static const struct attribute_spec ix86_attribute_table
[] =
36015 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
36016 affects_type_identity } */
36017 /* Stdcall attribute says callee is responsible for popping arguments
36018 if they are not variable. */
36019 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36021 /* Fastcall attribute says callee is responsible for popping arguments
36022 if they are not variable. */
36023 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36025 /* Thiscall attribute says callee is responsible for popping arguments
36026 if they are not variable. */
36027 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36029 /* Cdecl attribute says the callee is a normal C declaration */
36030 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36032 /* Regparm attribute specifies how many integer arguments are to be
36033 passed in registers. */
36034 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
36036 /* Sseregparm attribute says we are using x86_64 calling conventions
36037 for FP arguments. */
36038 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36040 /* The transactional memory builtins are implicitly regparm or fastcall
36041 depending on the ABI. Override the generic do-nothing attribute that
36042 these builtins were declared with. */
36043 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
36045 /* force_align_arg_pointer says this function realigns the stack at entry. */
36046 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
36047 false, true, true, ix86_handle_cconv_attribute
, false },
36048 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36049 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
36050 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
36051 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
36054 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
36056 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
36058 #ifdef SUBTARGET_ATTRIBUTE_TABLE
36059 SUBTARGET_ATTRIBUTE_TABLE
,
36061 /* ms_abi and sysv_abi calling convention function attributes. */
36062 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
36063 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
36064 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
36066 { "callee_pop_aggregate_return", 1, 1, false, true, true,
36067 ix86_handle_callee_pop_aggregate_return
, true },
36069 { NULL
, 0, 0, false, false, false, NULL
, false }
36072 /* Implement targetm.vectorize.builtin_vectorization_cost. */
36074 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
36076 int misalign ATTRIBUTE_UNUSED
)
36080 switch (type_of_cost
)
36083 return ix86_cost
->scalar_stmt_cost
;
36086 return ix86_cost
->scalar_load_cost
;
36089 return ix86_cost
->scalar_store_cost
;
36092 return ix86_cost
->vec_stmt_cost
;
36095 return ix86_cost
->vec_align_load_cost
;
36098 return ix86_cost
->vec_store_cost
;
36100 case vec_to_scalar
:
36101 return ix86_cost
->vec_to_scalar_cost
;
36103 case scalar_to_vec
:
36104 return ix86_cost
->scalar_to_vec_cost
;
36106 case unaligned_load
:
36107 case unaligned_store
:
36108 return ix86_cost
->vec_unalign_load_cost
;
36110 case cond_branch_taken
:
36111 return ix86_cost
->cond_taken_branch_cost
;
36113 case cond_branch_not_taken
:
36114 return ix86_cost
->cond_not_taken_branch_cost
;
36117 case vec_promote_demote
:
36118 return ix86_cost
->vec_stmt_cost
;
36120 case vec_construct
:
36121 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
36122 return elements
/ 2 + 1;
36125 gcc_unreachable ();
36129 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
36130 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
36131 insn every time. */
36133 static GTY(()) rtx vselect_insn
;
36135 /* Initialize vselect_insn. */
36138 init_vselect_insn (void)
36143 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
36144 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
36145 XVECEXP (x
, 0, i
) = const0_rtx
;
36146 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
36148 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
36150 vselect_insn
= emit_insn (x
);
36154 /* Construct (set target (vec_select op0 (parallel perm))) and
36155 return true if that's a valid instruction in the active ISA. */
36158 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
36159 unsigned nelt
, bool testing_p
)
36162 rtx x
, save_vconcat
;
36165 if (vselect_insn
== NULL_RTX
)
36166 init_vselect_insn ();
36168 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
36169 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
36170 for (i
= 0; i
< nelt
; ++i
)
36171 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
36172 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36173 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
36174 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
36175 SET_DEST (PATTERN (vselect_insn
)) = target
;
36176 icode
= recog_memoized (vselect_insn
);
36178 if (icode
>= 0 && !testing_p
)
36179 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
36181 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
36182 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
36183 INSN_CODE (vselect_insn
) = -1;
36188 /* Similar, but generate a vec_concat from op0 and op1 as well. */
36191 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
36192 const unsigned char *perm
, unsigned nelt
,
36195 enum machine_mode v2mode
;
36199 if (vselect_insn
== NULL_RTX
)
36200 init_vselect_insn ();
36202 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
36203 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36204 PUT_MODE (x
, v2mode
);
36207 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
36208 XEXP (x
, 0) = const0_rtx
;
36209 XEXP (x
, 1) = const0_rtx
;
36213 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36214 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
36217 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
36219 enum machine_mode vmode
= d
->vmode
;
36220 unsigned i
, mask
, nelt
= d
->nelt
;
36221 rtx target
, op0
, op1
, x
;
36222 rtx rperm
[32], vperm
;
36224 if (d
->one_operand_p
)
36226 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
36228 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
36230 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
36235 /* This is a blend, not a permute. Elements must stay in their
36236 respective lanes. */
36237 for (i
= 0; i
< nelt
; ++i
)
36239 unsigned e
= d
->perm
[i
];
36240 if (!(e
== i
|| e
== i
+ nelt
))
36247 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
36248 decision should be extracted elsewhere, so that we only try that
36249 sequence once all budget==3 options have been tried. */
36250 target
= d
->target
;
36263 for (i
= 0; i
< nelt
; ++i
)
36264 mask
|= (d
->perm
[i
] >= nelt
) << i
;
36268 for (i
= 0; i
< 2; ++i
)
36269 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
36274 for (i
= 0; i
< 4; ++i
)
36275 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36280 /* See if bytes move in pairs so we can use pblendw with
36281 an immediate argument, rather than pblendvb with a vector
36283 for (i
= 0; i
< 16; i
+= 2)
36284 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36287 for (i
= 0; i
< nelt
; ++i
)
36288 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
36291 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
36292 vperm
= force_reg (vmode
, vperm
);
36294 if (GET_MODE_SIZE (vmode
) == 16)
36295 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
36297 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
36301 for (i
= 0; i
< 8; ++i
)
36302 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36307 target
= gen_lowpart (vmode
, target
);
36308 op0
= gen_lowpart (vmode
, op0
);
36309 op1
= gen_lowpart (vmode
, op1
);
36313 /* See if bytes move in pairs. If not, vpblendvb must be used. */
36314 for (i
= 0; i
< 32; i
+= 2)
36315 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36317 /* See if bytes move in quadruplets. If yes, vpblendd
36318 with immediate can be used. */
36319 for (i
= 0; i
< 32; i
+= 4)
36320 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
36324 /* See if bytes move the same in both lanes. If yes,
36325 vpblendw with immediate can be used. */
36326 for (i
= 0; i
< 16; i
+= 2)
36327 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
36330 /* Use vpblendw. */
36331 for (i
= 0; i
< 16; ++i
)
36332 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
36337 /* Use vpblendd. */
36338 for (i
= 0; i
< 8; ++i
)
36339 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
36344 /* See if words move in pairs. If yes, vpblendd can be used. */
36345 for (i
= 0; i
< 16; i
+= 2)
36346 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36350 /* See if words move the same in both lanes. If not,
36351 vpblendvb must be used. */
36352 for (i
= 0; i
< 8; i
++)
36353 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
36355 /* Use vpblendvb. */
36356 for (i
= 0; i
< 32; ++i
)
36357 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
36361 target
= gen_lowpart (vmode
, target
);
36362 op0
= gen_lowpart (vmode
, op0
);
36363 op1
= gen_lowpart (vmode
, op1
);
36364 goto finish_pblendvb
;
36367 /* Use vpblendw. */
36368 for (i
= 0; i
< 16; ++i
)
36369 mask
|= (d
->perm
[i
] >= 16) << i
;
36373 /* Use vpblendd. */
36374 for (i
= 0; i
< 8; ++i
)
36375 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36380 /* Use vpblendd. */
36381 for (i
= 0; i
< 4; ++i
)
36382 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36387 gcc_unreachable ();
36390 /* This matches five different patterns with the different modes. */
36391 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
36392 x
= gen_rtx_SET (VOIDmode
, target
, x
);
36398 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36399 in terms of the variable form of vpermilps.
36401 Note that we will have already failed the immediate input vpermilps,
36402 which requires that the high and low part shuffle be identical; the
36403 variable form doesn't require that. */
36406 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
36408 rtx rperm
[8], vperm
;
36411 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
36414 /* We can only permute within the 128-bit lane. */
36415 for (i
= 0; i
< 8; ++i
)
36417 unsigned e
= d
->perm
[i
];
36418 if (i
< 4 ? e
>= 4 : e
< 4)
36425 for (i
= 0; i
< 8; ++i
)
36427 unsigned e
= d
->perm
[i
];
36429 /* Within each 128-bit lane, the elements of op0 are numbered
36430 from 0 and the elements of op1 are numbered from 4. */
36436 rperm
[i
] = GEN_INT (e
);
36439 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
36440 vperm
= force_reg (V8SImode
, vperm
);
36441 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
36446 /* Return true if permutation D can be performed as VMODE permutation
36450 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
36452 unsigned int i
, j
, chunk
;
36454 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
36455 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
36456 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
36459 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
36462 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
36463 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
36464 if (d
->perm
[i
] & (chunk
- 1))
36467 for (j
= 1; j
< chunk
; ++j
)
36468 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
36474 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36475 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
36478 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
36480 unsigned i
, nelt
, eltsz
, mask
;
36481 unsigned char perm
[32];
36482 enum machine_mode vmode
= V16QImode
;
36483 rtx rperm
[32], vperm
, target
, op0
, op1
;
36487 if (!d
->one_operand_p
)
36489 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
36492 && valid_perm_using_mode_p (V2TImode
, d
))
36497 /* Use vperm2i128 insn. The pattern uses
36498 V4DImode instead of V2TImode. */
36499 target
= gen_lowpart (V4DImode
, d
->target
);
36500 op0
= gen_lowpart (V4DImode
, d
->op0
);
36501 op1
= gen_lowpart (V4DImode
, d
->op1
);
36503 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
36504 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
36505 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
36513 if (GET_MODE_SIZE (d
->vmode
) == 16)
36518 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36523 /* V4DImode should be already handled through
36524 expand_vselect by vpermq instruction. */
36525 gcc_assert (d
->vmode
!= V4DImode
);
36528 if (d
->vmode
== V8SImode
36529 || d
->vmode
== V16HImode
36530 || d
->vmode
== V32QImode
)
36532 /* First see if vpermq can be used for
36533 V8SImode/V16HImode/V32QImode. */
36534 if (valid_perm_using_mode_p (V4DImode
, d
))
36536 for (i
= 0; i
< 4; i
++)
36537 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
36540 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
36541 gen_lowpart (V4DImode
, d
->op0
),
36545 /* Next see if vpermd can be used. */
36546 if (valid_perm_using_mode_p (V8SImode
, d
))
36549 /* Or if vpermps can be used. */
36550 else if (d
->vmode
== V8SFmode
)
36553 if (vmode
== V32QImode
)
36555 /* vpshufb only works intra lanes, it is not
36556 possible to shuffle bytes in between the lanes. */
36557 for (i
= 0; i
< nelt
; ++i
)
36558 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
36569 if (vmode
== V8SImode
)
36570 for (i
= 0; i
< 8; ++i
)
36571 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
36574 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36575 if (!d
->one_operand_p
)
36576 mask
= 2 * nelt
- 1;
36577 else if (vmode
== V16QImode
)
36580 mask
= nelt
/ 2 - 1;
36582 for (i
= 0; i
< nelt
; ++i
)
36584 unsigned j
, e
= d
->perm
[i
] & mask
;
36585 for (j
= 0; j
< eltsz
; ++j
)
36586 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
36590 vperm
= gen_rtx_CONST_VECTOR (vmode
,
36591 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
36592 vperm
= force_reg (vmode
, vperm
);
36594 target
= gen_lowpart (vmode
, d
->target
);
36595 op0
= gen_lowpart (vmode
, d
->op0
);
36596 if (d
->one_operand_p
)
36598 if (vmode
== V16QImode
)
36599 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
36600 else if (vmode
== V32QImode
)
36601 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
36602 else if (vmode
== V8SFmode
)
36603 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
36605 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
36609 op1
= gen_lowpart (vmode
, d
->op1
);
36610 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
36616 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36617 in a single instruction. */
36620 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
36622 unsigned i
, nelt
= d
->nelt
;
36623 unsigned char perm2
[MAX_VECT_LEN
];
36625 /* Check plain VEC_SELECT first, because AVX has instructions that could
36626 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36627 input where SEL+CONCAT may not. */
36628 if (d
->one_operand_p
)
36630 int mask
= nelt
- 1;
36631 bool identity_perm
= true;
36632 bool broadcast_perm
= true;
36634 for (i
= 0; i
< nelt
; i
++)
36636 perm2
[i
] = d
->perm
[i
] & mask
;
36638 identity_perm
= false;
36640 broadcast_perm
= false;
36646 emit_move_insn (d
->target
, d
->op0
);
36649 else if (broadcast_perm
&& TARGET_AVX2
)
36651 /* Use vpbroadcast{b,w,d}. */
36652 rtx (*gen
) (rtx
, rtx
) = NULL
;
36656 gen
= gen_avx2_pbroadcastv32qi_1
;
36659 gen
= gen_avx2_pbroadcastv16hi_1
;
36662 gen
= gen_avx2_pbroadcastv8si_1
;
36665 gen
= gen_avx2_pbroadcastv16qi
;
36668 gen
= gen_avx2_pbroadcastv8hi
;
36671 gen
= gen_avx2_vec_dupv8sf_1
;
36673 /* For other modes prefer other shuffles this function creates. */
36679 emit_insn (gen (d
->target
, d
->op0
));
36684 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
36687 /* There are plenty of patterns in sse.md that are written for
36688 SEL+CONCAT and are not replicated for a single op. Perhaps
36689 that should be changed, to avoid the nastiness here. */
36691 /* Recognize interleave style patterns, which means incrementing
36692 every other permutation operand. */
36693 for (i
= 0; i
< nelt
; i
+= 2)
36695 perm2
[i
] = d
->perm
[i
] & mask
;
36696 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
36698 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
36702 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36705 for (i
= 0; i
< nelt
; i
+= 4)
36707 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
36708 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
36709 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
36710 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
36713 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
36719 /* Finally, try the fully general two operand permute. */
36720 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
36724 /* Recognize interleave style patterns with reversed operands. */
36725 if (!d
->one_operand_p
)
36727 for (i
= 0; i
< nelt
; ++i
)
36729 unsigned e
= d
->perm
[i
];
36737 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
36742 /* Try the SSE4.1 blend variable merge instructions. */
36743 if (expand_vec_perm_blend (d
))
36746 /* Try one of the AVX vpermil variable permutations. */
36747 if (expand_vec_perm_vpermil (d
))
36750 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36751 vpshufb, vpermd, vpermps or vpermq variable permutation. */
36752 if (expand_vec_perm_pshufb (d
))
36758 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36759 in terms of a pair of pshuflw + pshufhw instructions. */
36762 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
36764 unsigned char perm2
[MAX_VECT_LEN
];
36768 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
36771 /* The two permutations only operate in 64-bit lanes. */
36772 for (i
= 0; i
< 4; ++i
)
36773 if (d
->perm
[i
] >= 4)
36775 for (i
= 4; i
< 8; ++i
)
36776 if (d
->perm
[i
] < 4)
36782 /* Emit the pshuflw. */
36783 memcpy (perm2
, d
->perm
, 4);
36784 for (i
= 4; i
< 8; ++i
)
36786 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
36789 /* Emit the pshufhw. */
36790 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
36791 for (i
= 0; i
< 4; ++i
)
36793 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
36799 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36800 the permutation using the SSSE3 palignr instruction. This succeeds
36801 when all of the elements in PERM fit within one vector and we merely
36802 need to shift them down so that a single vector permutation has a
36803 chance to succeed. */
36806 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
36808 unsigned i
, nelt
= d
->nelt
;
36813 /* Even with AVX, palignr only operates on 128-bit vectors. */
36814 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36817 min
= nelt
, max
= 0;
36818 for (i
= 0; i
< nelt
; ++i
)
36820 unsigned e
= d
->perm
[i
];
36826 if (min
== 0 || max
- min
>= nelt
)
36829 /* Given that we have SSSE3, we know we'll be able to implement the
36830 single operand permutation after the palignr with pshufb. */
36834 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
36835 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
36836 gen_lowpart (TImode
, d
->op1
),
36837 gen_lowpart (TImode
, d
->op0
), shift
));
36839 d
->op0
= d
->op1
= d
->target
;
36840 d
->one_operand_p
= true;
36843 for (i
= 0; i
< nelt
; ++i
)
36845 unsigned e
= d
->perm
[i
] - min
;
36851 /* Test for the degenerate case where the alignment by itself
36852 produces the desired permutation. */
36856 ok
= expand_vec_perm_1 (d
);
36862 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
36864 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36865 a two vector permutation into a single vector permutation by using
36866 an interleave operation to merge the vectors. */
36869 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
36871 struct expand_vec_perm_d dremap
, dfinal
;
36872 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36873 unsigned HOST_WIDE_INT contents
;
36874 unsigned char remap
[2 * MAX_VECT_LEN
];
36876 bool ok
, same_halves
= false;
36878 if (GET_MODE_SIZE (d
->vmode
) == 16)
36880 if (d
->one_operand_p
)
36883 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36887 /* For 32-byte modes allow even d->one_operand_p.
36888 The lack of cross-lane shuffling in some instructions
36889 might prevent a single insn shuffle. */
36891 dfinal
.testing_p
= true;
36892 /* If expand_vec_perm_interleave3 can expand this into
36893 a 3 insn sequence, give up and let it be expanded as
36894 3 insn sequence. While that is one insn longer,
36895 it doesn't need a memory operand and in the common
36896 case that both interleave low and high permutations
36897 with the same operands are adjacent needs 4 insns
36898 for both after CSE. */
36899 if (expand_vec_perm_interleave3 (&dfinal
))
36905 /* Examine from whence the elements come. */
36907 for (i
= 0; i
< nelt
; ++i
)
36908 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
36910 memset (remap
, 0xff, sizeof (remap
));
36913 if (GET_MODE_SIZE (d
->vmode
) == 16)
36915 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
36917 /* Split the two input vectors into 4 halves. */
36918 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
36923 /* If the elements from the low halves use interleave low, and similarly
36924 for interleave high. If the elements are from mis-matched halves, we
36925 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36926 if ((contents
& (h1
| h3
)) == contents
)
36929 for (i
= 0; i
< nelt2
; ++i
)
36932 remap
[i
+ nelt
] = i
* 2 + 1;
36933 dremap
.perm
[i
* 2] = i
;
36934 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36936 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36937 dremap
.vmode
= V4SFmode
;
36939 else if ((contents
& (h2
| h4
)) == contents
)
36942 for (i
= 0; i
< nelt2
; ++i
)
36944 remap
[i
+ nelt2
] = i
* 2;
36945 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
36946 dremap
.perm
[i
* 2] = i
+ nelt2
;
36947 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
36949 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36950 dremap
.vmode
= V4SFmode
;
36952 else if ((contents
& (h1
| h4
)) == contents
)
36955 for (i
= 0; i
< nelt2
; ++i
)
36958 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
36959 dremap
.perm
[i
] = i
;
36960 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
36965 dremap
.vmode
= V2DImode
;
36967 dremap
.perm
[0] = 0;
36968 dremap
.perm
[1] = 3;
36971 else if ((contents
& (h2
| h3
)) == contents
)
36974 for (i
= 0; i
< nelt2
; ++i
)
36976 remap
[i
+ nelt2
] = i
;
36977 remap
[i
+ nelt
] = i
+ nelt2
;
36978 dremap
.perm
[i
] = i
+ nelt2
;
36979 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
36984 dremap
.vmode
= V2DImode
;
36986 dremap
.perm
[0] = 1;
36987 dremap
.perm
[1] = 2;
36995 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
36996 unsigned HOST_WIDE_INT q
[8];
36997 unsigned int nonzero_halves
[4];
36999 /* Split the two input vectors into 8 quarters. */
37000 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
37001 for (i
= 1; i
< 8; ++i
)
37002 q
[i
] = q
[0] << (nelt4
* i
);
37003 for (i
= 0; i
< 4; ++i
)
37004 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
37006 nonzero_halves
[nzcnt
] = i
;
37012 gcc_assert (d
->one_operand_p
);
37013 nonzero_halves
[1] = nonzero_halves
[0];
37014 same_halves
= true;
37016 else if (d
->one_operand_p
)
37018 gcc_assert (nonzero_halves
[0] == 0);
37019 gcc_assert (nonzero_halves
[1] == 1);
37024 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
37026 /* Attempt to increase the likelihood that dfinal
37027 shuffle will be intra-lane. */
37028 char tmph
= nonzero_halves
[0];
37029 nonzero_halves
[0] = nonzero_halves
[1];
37030 nonzero_halves
[1] = tmph
;
37033 /* vperm2f128 or vperm2i128. */
37034 for (i
= 0; i
< nelt2
; ++i
)
37036 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
37037 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
37038 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
37039 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
37042 if (d
->vmode
!= V8SFmode
37043 && d
->vmode
!= V4DFmode
37044 && d
->vmode
!= V8SImode
)
37046 dremap
.vmode
= V8SImode
;
37048 for (i
= 0; i
< 4; ++i
)
37050 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
37051 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
37055 else if (d
->one_operand_p
)
37057 else if (TARGET_AVX2
37058 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
37061 for (i
= 0; i
< nelt4
; ++i
)
37064 remap
[i
+ nelt
] = i
* 2 + 1;
37065 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
37066 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
37067 dremap
.perm
[i
* 2] = i
;
37068 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
37069 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
37070 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
37073 else if (TARGET_AVX2
37074 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
37077 for (i
= 0; i
< nelt4
; ++i
)
37079 remap
[i
+ nelt4
] = i
* 2;
37080 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
37081 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
37082 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
37083 dremap
.perm
[i
* 2] = i
+ nelt4
;
37084 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
37085 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
37086 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
37093 /* Use the remapping array set up above to move the elements from their
37094 swizzled locations into their final destinations. */
37096 for (i
= 0; i
< nelt
; ++i
)
37098 unsigned e
= remap
[d
->perm
[i
]];
37099 gcc_assert (e
< nelt
);
37100 /* If same_halves is true, both halves of the remapped vector are the
37101 same. Avoid cross-lane accesses if possible. */
37102 if (same_halves
&& i
>= nelt2
)
37104 gcc_assert (e
< nelt2
);
37105 dfinal
.perm
[i
] = e
+ nelt2
;
37108 dfinal
.perm
[i
] = e
;
37110 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
37111 dfinal
.op1
= dfinal
.op0
;
37112 dfinal
.one_operand_p
= true;
37113 dremap
.target
= dfinal
.op0
;
37115 /* Test if the final remap can be done with a single insn. For V4SFmode or
37116 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
37118 ok
= expand_vec_perm_1 (&dfinal
);
37119 seq
= get_insns ();
37128 if (dremap
.vmode
!= dfinal
.vmode
)
37130 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
37131 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
37132 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
37135 ok
= expand_vec_perm_1 (&dremap
);
37142 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37143 a single vector cross-lane permutation into vpermq followed
37144 by any of the single insn permutations. */
37147 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
37149 struct expand_vec_perm_d dremap
, dfinal
;
37150 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
37151 unsigned contents
[2];
37155 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
37156 && d
->one_operand_p
))
37161 for (i
= 0; i
< nelt2
; ++i
)
37163 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
37164 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
37167 for (i
= 0; i
< 2; ++i
)
37169 unsigned int cnt
= 0;
37170 for (j
= 0; j
< 4; ++j
)
37171 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
37179 dremap
.vmode
= V4DImode
;
37181 dremap
.target
= gen_reg_rtx (V4DImode
);
37182 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
37183 dremap
.op1
= dremap
.op0
;
37184 dremap
.one_operand_p
= true;
37185 for (i
= 0; i
< 2; ++i
)
37187 unsigned int cnt
= 0;
37188 for (j
= 0; j
< 4; ++j
)
37189 if ((contents
[i
] & (1u << j
)) != 0)
37190 dremap
.perm
[2 * i
+ cnt
++] = j
;
37191 for (; cnt
< 2; ++cnt
)
37192 dremap
.perm
[2 * i
+ cnt
] = 0;
37196 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
37197 dfinal
.op1
= dfinal
.op0
;
37198 dfinal
.one_operand_p
= true;
37199 for (i
= 0, j
= 0; i
< nelt
; ++i
)
37203 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
37204 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
37206 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
37207 dfinal
.perm
[i
] |= nelt4
;
37209 gcc_unreachable ();
37212 ok
= expand_vec_perm_1 (&dremap
);
37215 ok
= expand_vec_perm_1 (&dfinal
);
37221 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
37222 a vector permutation using two instructions, vperm2f128 resp.
37223 vperm2i128 followed by any single in-lane permutation. */
37226 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
37228 struct expand_vec_perm_d dfirst
, dsecond
;
37229 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
37233 || GET_MODE_SIZE (d
->vmode
) != 32
37234 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
37238 dsecond
.one_operand_p
= false;
37239 dsecond
.testing_p
= true;
37241 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
37242 immediate. For perm < 16 the second permutation uses
37243 d->op0 as first operand, for perm >= 16 it uses d->op1
37244 as first operand. The second operand is the result of
37246 for (perm
= 0; perm
< 32; perm
++)
37248 /* Ignore permutations which do not move anything cross-lane. */
37251 /* The second shuffle for e.g. V4DFmode has
37252 0123 and ABCD operands.
37253 Ignore AB23, as 23 is already in the second lane
37254 of the first operand. */
37255 if ((perm
& 0xc) == (1 << 2)) continue;
37256 /* And 01CD, as 01 is in the first lane of the first
37258 if ((perm
& 3) == 0) continue;
37259 /* And 4567, as then the vperm2[fi]128 doesn't change
37260 anything on the original 4567 second operand. */
37261 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
37265 /* The second shuffle for e.g. V4DFmode has
37266 4567 and ABCD operands.
37267 Ignore AB67, as 67 is already in the second lane
37268 of the first operand. */
37269 if ((perm
& 0xc) == (3 << 2)) continue;
37270 /* And 45CD, as 45 is in the first lane of the first
37272 if ((perm
& 3) == 2) continue;
37273 /* And 0123, as then the vperm2[fi]128 doesn't change
37274 anything on the original 0123 first operand. */
37275 if ((perm
& 0xf) == (1 << 2)) continue;
37278 for (i
= 0; i
< nelt
; i
++)
37280 j
= d
->perm
[i
] / nelt2
;
37281 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
37282 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
37283 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
37284 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
37292 ok
= expand_vec_perm_1 (&dsecond
);
37303 /* Found a usable second shuffle. dfirst will be
37304 vperm2f128 on d->op0 and d->op1. */
37305 dsecond
.testing_p
= false;
37307 dfirst
.target
= gen_reg_rtx (d
->vmode
);
37308 for (i
= 0; i
< nelt
; i
++)
37309 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
37310 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
37312 ok
= expand_vec_perm_1 (&dfirst
);
37315 /* And dsecond is some single insn shuffle, taking
37316 d->op0 and result of vperm2f128 (if perm < 16) or
37317 d->op1 and result of vperm2f128 (otherwise). */
37318 dsecond
.op1
= dfirst
.target
;
37320 dsecond
.op0
= dfirst
.op1
;
37322 ok
= expand_vec_perm_1 (&dsecond
);
37328 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
37329 if (d
->one_operand_p
)
37336 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37337 a two vector permutation using 2 intra-lane interleave insns
37338 and cross-lane shuffle for 32-byte vectors. */
37341 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
37344 rtx (*gen
) (rtx
, rtx
, rtx
);
37346 if (d
->one_operand_p
)
37348 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
37350 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
37356 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
37358 for (i
= 0; i
< nelt
; i
+= 2)
37359 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
37360 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
37370 gen
= gen_vec_interleave_highv32qi
;
37372 gen
= gen_vec_interleave_lowv32qi
;
37376 gen
= gen_vec_interleave_highv16hi
;
37378 gen
= gen_vec_interleave_lowv16hi
;
37382 gen
= gen_vec_interleave_highv8si
;
37384 gen
= gen_vec_interleave_lowv8si
;
37388 gen
= gen_vec_interleave_highv4di
;
37390 gen
= gen_vec_interleave_lowv4di
;
37394 gen
= gen_vec_interleave_highv8sf
;
37396 gen
= gen_vec_interleave_lowv8sf
;
37400 gen
= gen_vec_interleave_highv4df
;
37402 gen
= gen_vec_interleave_lowv4df
;
37405 gcc_unreachable ();
37408 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
37412 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
37413 a single vector permutation using a single intra-lane vector
37414 permutation, vperm2f128 swapping the lanes and vblend* insn blending
37415 the non-swapped and swapped vectors together. */
37418 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
37420 struct expand_vec_perm_d dfirst
, dsecond
;
37421 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
37424 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
37428 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
37429 || !d
->one_operand_p
)
37433 for (i
= 0; i
< nelt
; i
++)
37434 dfirst
.perm
[i
] = 0xff;
37435 for (i
= 0, msk
= 0; i
< nelt
; i
++)
37437 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
37438 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
37440 dfirst
.perm
[j
] = d
->perm
[i
];
37444 for (i
= 0; i
< nelt
; i
++)
37445 if (dfirst
.perm
[i
] == 0xff)
37446 dfirst
.perm
[i
] = i
;
37449 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
37452 ok
= expand_vec_perm_1 (&dfirst
);
37453 seq
= get_insns ();
37465 dsecond
.op0
= dfirst
.target
;
37466 dsecond
.op1
= dfirst
.target
;
37467 dsecond
.one_operand_p
= true;
37468 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
37469 for (i
= 0; i
< nelt
; i
++)
37470 dsecond
.perm
[i
] = i
^ nelt2
;
37472 ok
= expand_vec_perm_1 (&dsecond
);
37475 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
37476 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
37480 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
37481 permutation using two vperm2f128, followed by a vshufpd insn blending
37482 the two vectors together. */
37485 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
37487 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
37490 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
37500 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
37501 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
37502 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
37503 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
37504 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
37505 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
37506 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
37507 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
37508 dthird
.perm
[0] = (d
->perm
[0] % 2);
37509 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
37510 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
37511 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
37513 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
37514 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
37515 dthird
.op0
= dfirst
.target
;
37516 dthird
.op1
= dsecond
.target
;
37517 dthird
.one_operand_p
= false;
37519 canonicalize_perm (&dfirst
);
37520 canonicalize_perm (&dsecond
);
37522 ok
= expand_vec_perm_1 (&dfirst
)
37523 && expand_vec_perm_1 (&dsecond
)
37524 && expand_vec_perm_1 (&dthird
);
37531 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
37532 permutation with two pshufb insns and an ior. We should have already
37533 failed all two instruction sequences. */
37536 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
37538 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
37539 unsigned int i
, nelt
, eltsz
;
37541 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
37543 gcc_assert (!d
->one_operand_p
);
37546 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37548 /* Generate two permutation masks. If the required element is within
37549 the given vector it is shuffled into the proper lane. If the required
37550 element is in the other vector, force a zero into the lane by setting
37551 bit 7 in the permutation mask. */
37552 m128
= GEN_INT (-128);
37553 for (i
= 0; i
< nelt
; ++i
)
37555 unsigned j
, e
= d
->perm
[i
];
37556 unsigned which
= (e
>= nelt
);
37560 for (j
= 0; j
< eltsz
; ++j
)
37562 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
37563 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
37567 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
37568 vperm
= force_reg (V16QImode
, vperm
);
37570 l
= gen_reg_rtx (V16QImode
);
37571 op
= gen_lowpart (V16QImode
, d
->op0
);
37572 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
37574 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
37575 vperm
= force_reg (V16QImode
, vperm
);
37577 h
= gen_reg_rtx (V16QImode
);
37578 op
= gen_lowpart (V16QImode
, d
->op1
);
37579 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
37581 op
= gen_lowpart (V16QImode
, d
->target
);
37582 emit_insn (gen_iorv16qi3 (op
, l
, h
));
37587 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
37588 with two vpshufb insns, vpermq and vpor. We should have already failed
37589 all two or three instruction sequences. */
37592 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
37594 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
37595 unsigned int i
, nelt
, eltsz
;
37598 || !d
->one_operand_p
37599 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37606 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37608 /* Generate two permutation masks. If the required element is within
37609 the same lane, it is shuffled in. If the required element from the
37610 other lane, force a zero by setting bit 7 in the permutation mask.
37611 In the other mask the mask has non-negative elements if element
37612 is requested from the other lane, but also moved to the other lane,
37613 so that the result of vpshufb can have the two V2TImode halves
37615 m128
= GEN_INT (-128);
37616 for (i
= 0; i
< nelt
; ++i
)
37618 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37619 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37621 for (j
= 0; j
< eltsz
; ++j
)
37623 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
37624 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
37628 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37629 vperm
= force_reg (V32QImode
, vperm
);
37631 h
= gen_reg_rtx (V32QImode
);
37632 op
= gen_lowpart (V32QImode
, d
->op0
);
37633 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37635 /* Swap the 128-byte lanes of h into hp. */
37636 hp
= gen_reg_rtx (V4DImode
);
37637 op
= gen_lowpart (V4DImode
, h
);
37638 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
37641 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37642 vperm
= force_reg (V32QImode
, vperm
);
37644 l
= gen_reg_rtx (V32QImode
);
37645 op
= gen_lowpart (V32QImode
, d
->op0
);
37646 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37648 op
= gen_lowpart (V32QImode
, d
->target
);
37649 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
37654 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
37655 and extract-odd permutations of two V32QImode and V16QImode operand
37656 with two vpshufb insns, vpor and vpermq. We should have already
37657 failed all two or three instruction sequences. */
37660 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
37662 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
37663 unsigned int i
, nelt
, eltsz
;
37666 || d
->one_operand_p
37667 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37670 for (i
= 0; i
< d
->nelt
; ++i
)
37671 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
37678 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37680 /* Generate two permutation masks. In the first permutation mask
37681 the first quarter will contain indexes for the first half
37682 of the op0, the second quarter will contain bit 7 set, third quarter
37683 will contain indexes for the second half of the op0 and the
37684 last quarter bit 7 set. In the second permutation mask
37685 the first quarter will contain bit 7 set, the second quarter
37686 indexes for the first half of the op1, the third quarter bit 7 set
37687 and last quarter indexes for the second half of the op1.
37688 I.e. the first mask e.g. for V32QImode extract even will be:
37689 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
37690 (all values masked with 0xf except for -128) and second mask
37691 for extract even will be
37692 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
37693 m128
= GEN_INT (-128);
37694 for (i
= 0; i
< nelt
; ++i
)
37696 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37697 unsigned which
= d
->perm
[i
] >= nelt
;
37698 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
37700 for (j
= 0; j
< eltsz
; ++j
)
37702 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
37703 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
37707 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37708 vperm
= force_reg (V32QImode
, vperm
);
37710 l
= gen_reg_rtx (V32QImode
);
37711 op
= gen_lowpart (V32QImode
, d
->op0
);
37712 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37714 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37715 vperm
= force_reg (V32QImode
, vperm
);
37717 h
= gen_reg_rtx (V32QImode
);
37718 op
= gen_lowpart (V32QImode
, d
->op1
);
37719 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37721 ior
= gen_reg_rtx (V32QImode
);
37722 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
37724 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
37725 op
= gen_lowpart (V4DImode
, d
->target
);
37726 ior
= gen_lowpart (V4DImode
, ior
);
37727 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
37728 const1_rtx
, GEN_INT (3)));
37733 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
37734 and extract-odd permutations. */
37737 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
37744 t1
= gen_reg_rtx (V4DFmode
);
37745 t2
= gen_reg_rtx (V4DFmode
);
37747 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37748 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37749 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37751 /* Now an unpck[lh]pd will produce the result required. */
37753 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
37755 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
37761 int mask
= odd
? 0xdd : 0x88;
37763 t1
= gen_reg_rtx (V8SFmode
);
37764 t2
= gen_reg_rtx (V8SFmode
);
37765 t3
= gen_reg_rtx (V8SFmode
);
37767 /* Shuffle within the 128-bit lanes to produce:
37768 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
37769 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
37772 /* Shuffle the lanes around to produce:
37773 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
37774 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
37777 /* Shuffle within the 128-bit lanes to produce:
37778 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
37779 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
37781 /* Shuffle within the 128-bit lanes to produce:
37782 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
37783 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
37785 /* Shuffle the lanes around to produce:
37786 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
37787 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
37796 /* These are always directly implementable by expand_vec_perm_1. */
37797 gcc_unreachable ();
37801 return expand_vec_perm_pshufb2 (d
);
37804 /* We need 2*log2(N)-1 operations to achieve odd/even
37805 with interleave. */
37806 t1
= gen_reg_rtx (V8HImode
);
37807 t2
= gen_reg_rtx (V8HImode
);
37808 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
37809 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
37810 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
37811 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
37813 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
37815 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
37822 return expand_vec_perm_pshufb2 (d
);
37825 t1
= gen_reg_rtx (V16QImode
);
37826 t2
= gen_reg_rtx (V16QImode
);
37827 t3
= gen_reg_rtx (V16QImode
);
37828 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
37829 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
37830 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
37831 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
37832 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
37833 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
37835 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
37837 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
37844 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
37849 struct expand_vec_perm_d d_copy
= *d
;
37850 d_copy
.vmode
= V4DFmode
;
37851 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
37852 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
37853 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
37854 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37857 t1
= gen_reg_rtx (V4DImode
);
37858 t2
= gen_reg_rtx (V4DImode
);
37860 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37861 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37862 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37864 /* Now an vpunpck[lh]qdq will produce the result required. */
37866 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
37868 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
37875 struct expand_vec_perm_d d_copy
= *d
;
37876 d_copy
.vmode
= V8SFmode
;
37877 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
37878 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
37879 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
37880 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37883 t1
= gen_reg_rtx (V8SImode
);
37884 t2
= gen_reg_rtx (V8SImode
);
37886 /* Shuffle the lanes around into
37887 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37888 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
37889 gen_lowpart (V4DImode
, d
->op0
),
37890 gen_lowpart (V4DImode
, d
->op1
),
37892 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
37893 gen_lowpart (V4DImode
, d
->op0
),
37894 gen_lowpart (V4DImode
, d
->op1
),
37897 /* Swap the 2nd and 3rd position in each lane into
37898 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37899 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
37900 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37901 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
37902 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37904 /* Now an vpunpck[lh]qdq will produce
37905 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37907 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
37908 gen_lowpart (V4DImode
, t1
),
37909 gen_lowpart (V4DImode
, t2
));
37911 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
37912 gen_lowpart (V4DImode
, t1
),
37913 gen_lowpart (V4DImode
, t2
));
37918 gcc_unreachable ();
37924 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37925 extract-even and extract-odd permutations. */
37928 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
37930 unsigned i
, odd
, nelt
= d
->nelt
;
37933 if (odd
!= 0 && odd
!= 1)
37936 for (i
= 1; i
< nelt
; ++i
)
37937 if (d
->perm
[i
] != 2 * i
+ odd
)
37940 return expand_vec_perm_even_odd_1 (d
, odd
);
37943 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37944 permutations. We assume that expand_vec_perm_1 has already failed. */
37947 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
37949 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
37950 enum machine_mode vmode
= d
->vmode
;
37951 unsigned char perm2
[4];
37959 /* These are special-cased in sse.md so that we can optionally
37960 use the vbroadcast instruction. They expand to two insns
37961 if the input happens to be in a register. */
37962 gcc_unreachable ();
37968 /* These are always implementable using standard shuffle patterns. */
37969 gcc_unreachable ();
37973 /* These can be implemented via interleave. We save one insn by
37974 stopping once we have promoted to V4SImode and then use pshufd. */
37978 rtx (*gen
) (rtx
, rtx
, rtx
)
37979 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
37980 : gen_vec_interleave_lowv8hi
;
37984 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
37985 : gen_vec_interleave_highv8hi
;
37990 dest
= gen_reg_rtx (vmode
);
37991 emit_insn (gen (dest
, op0
, op0
));
37992 vmode
= get_mode_wider_vector (vmode
);
37993 op0
= gen_lowpart (vmode
, dest
);
37995 while (vmode
!= V4SImode
);
37997 memset (perm2
, elt
, 4);
37998 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
38007 /* For AVX2 broadcasts of the first element vpbroadcast* or
38008 vpermq should be used by expand_vec_perm_1. */
38009 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
38013 gcc_unreachable ();
38017 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
38018 broadcast permutations. */
38021 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
38023 unsigned i
, elt
, nelt
= d
->nelt
;
38025 if (!d
->one_operand_p
)
38029 for (i
= 1; i
< nelt
; ++i
)
38030 if (d
->perm
[i
] != elt
)
38033 return expand_vec_perm_broadcast_1 (d
);
38036 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
38037 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
38038 all the shorter instruction sequences. */
38041 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
38043 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
38044 unsigned int i
, nelt
, eltsz
;
38048 || d
->one_operand_p
38049 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
38056 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38058 /* Generate 4 permutation masks. If the required element is within
38059 the same lane, it is shuffled in. If the required element from the
38060 other lane, force a zero by setting bit 7 in the permutation mask.
38061 In the other mask the mask has non-negative elements if element
38062 is requested from the other lane, but also moved to the other lane,
38063 so that the result of vpshufb can have the two V2TImode halves
38065 m128
= GEN_INT (-128);
38066 for (i
= 0; i
< 32; ++i
)
38068 rperm
[0][i
] = m128
;
38069 rperm
[1][i
] = m128
;
38070 rperm
[2][i
] = m128
;
38071 rperm
[3][i
] = m128
;
38077 for (i
= 0; i
< nelt
; ++i
)
38079 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38080 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
38081 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
38083 for (j
= 0; j
< eltsz
; ++j
)
38084 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
38085 used
[which
] = true;
38088 for (i
= 0; i
< 2; ++i
)
38090 if (!used
[2 * i
+ 1])
38095 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
38096 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
38097 vperm
= force_reg (V32QImode
, vperm
);
38098 h
[i
] = gen_reg_rtx (V32QImode
);
38099 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
38100 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
38103 /* Swap the 128-byte lanes of h[X]. */
38104 for (i
= 0; i
< 2; ++i
)
38106 if (h
[i
] == NULL_RTX
)
38108 op
= gen_reg_rtx (V4DImode
);
38109 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
38110 const2_rtx
, GEN_INT (3), const0_rtx
,
38112 h
[i
] = gen_lowpart (V32QImode
, op
);
38115 for (i
= 0; i
< 2; ++i
)
38122 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
38123 vperm
= force_reg (V32QImode
, vperm
);
38124 l
[i
] = gen_reg_rtx (V32QImode
);
38125 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
38126 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
38129 for (i
= 0; i
< 2; ++i
)
38133 op
= gen_reg_rtx (V32QImode
);
38134 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
38141 gcc_assert (l
[0] && l
[1]);
38142 op
= gen_lowpart (V32QImode
, d
->target
);
38143 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
38147 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
38148 With all of the interface bits taken care of, perform the expansion
38149 in D and return true on success. */
38152 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
38154 /* Try a single instruction expansion. */
38155 if (expand_vec_perm_1 (d
))
38158 /* Try sequences of two instructions. */
38160 if (expand_vec_perm_pshuflw_pshufhw (d
))
38163 if (expand_vec_perm_palignr (d
))
38166 if (expand_vec_perm_interleave2 (d
))
38169 if (expand_vec_perm_broadcast (d
))
38172 if (expand_vec_perm_vpermq_perm_1 (d
))
38175 if (expand_vec_perm_vperm2f128 (d
))
38178 /* Try sequences of three instructions. */
38180 if (expand_vec_perm_2vperm2f128_vshuf (d
))
38183 if (expand_vec_perm_pshufb2 (d
))
38186 if (expand_vec_perm_interleave3 (d
))
38189 if (expand_vec_perm_vperm2f128_vblend (d
))
38192 /* Try sequences of four instructions. */
38194 if (expand_vec_perm_vpshufb2_vpermq (d
))
38197 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
38200 /* ??? Look for narrow permutations whose element orderings would
38201 allow the promotion to a wider mode. */
38203 /* ??? Look for sequences of interleave or a wider permute that place
38204 the data into the correct lanes for a half-vector shuffle like
38205 pshuf[lh]w or vpermilps. */
38207 /* ??? Look for sequences of interleave that produce the desired results.
38208 The combinatorics of punpck[lh] get pretty ugly... */
38210 if (expand_vec_perm_even_odd (d
))
38213 /* Even longer sequences. */
38214 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
38220 /* If a permutation only uses one operand, make it clear. Returns true
38221 if the permutation references both operands. */
38224 canonicalize_perm (struct expand_vec_perm_d
*d
)
38226 int i
, which
, nelt
= d
->nelt
;
38228 for (i
= which
= 0; i
< nelt
; ++i
)
38229 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
38231 d
->one_operand_p
= true;
38238 if (!rtx_equal_p (d
->op0
, d
->op1
))
38240 d
->one_operand_p
= false;
38243 /* The elements of PERM do not suggest that only the first operand
38244 is used, but both operands are identical. Allow easier matching
38245 of the permutation by folding the permutation into the single
38250 for (i
= 0; i
< nelt
; ++i
)
38251 d
->perm
[i
] &= nelt
- 1;
38260 return (which
== 3);
38264 ix86_expand_vec_perm_const (rtx operands
[4])
38266 struct expand_vec_perm_d d
;
38267 unsigned char perm
[MAX_VECT_LEN
];
38272 d
.target
= operands
[0];
38273 d
.op0
= operands
[1];
38274 d
.op1
= operands
[2];
38277 d
.vmode
= GET_MODE (d
.target
);
38278 gcc_assert (VECTOR_MODE_P (d
.vmode
));
38279 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38280 d
.testing_p
= false;
38282 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
38283 gcc_assert (XVECLEN (sel
, 0) == nelt
);
38284 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
38286 for (i
= 0; i
< nelt
; ++i
)
38288 rtx e
= XVECEXP (sel
, 0, i
);
38289 int ei
= INTVAL (e
) & (2 * nelt
- 1);
38294 two_args
= canonicalize_perm (&d
);
38296 if (ix86_expand_vec_perm_const_1 (&d
))
38299 /* If the selector says both arguments are needed, but the operands are the
38300 same, the above tried to expand with one_operand_p and flattened selector.
38301 If that didn't work, retry without one_operand_p; we succeeded with that
38303 if (two_args
&& d
.one_operand_p
)
38305 d
.one_operand_p
= false;
38306 memcpy (d
.perm
, perm
, sizeof (perm
));
38307 return ix86_expand_vec_perm_const_1 (&d
);
38313 /* Implement targetm.vectorize.vec_perm_const_ok. */
38316 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
38317 const unsigned char *sel
)
38319 struct expand_vec_perm_d d
;
38320 unsigned int i
, nelt
, which
;
38324 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38325 d
.testing_p
= true;
38327 /* Given sufficient ISA support we can just return true here
38328 for selected vector modes. */
38329 if (GET_MODE_SIZE (d
.vmode
) == 16)
38331 /* All implementable with a single vpperm insn. */
38334 /* All implementable with 2 pshufb + 1 ior. */
38337 /* All implementable with shufpd or unpck[lh]pd. */
38342 /* Extract the values from the vector CST into the permutation
38344 memcpy (d
.perm
, sel
, nelt
);
38345 for (i
= which
= 0; i
< nelt
; ++i
)
38347 unsigned char e
= d
.perm
[i
];
38348 gcc_assert (e
< 2 * nelt
);
38349 which
|= (e
< nelt
? 1 : 2);
38352 /* For all elements from second vector, fold the elements to first. */
38354 for (i
= 0; i
< nelt
; ++i
)
38357 /* Check whether the mask can be applied to the vector type. */
38358 d
.one_operand_p
= (which
!= 3);
38360 /* Implementable with shufps or pshufd. */
38361 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
38364 /* Otherwise we have to go through the motions and see if we can
38365 figure out how to generate the requested permutation. */
38366 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
38367 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
38368 if (!d
.one_operand_p
)
38369 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
38372 ret
= ix86_expand_vec_perm_const_1 (&d
);
38379 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
38381 struct expand_vec_perm_d d
;
38387 d
.vmode
= GET_MODE (targ
);
38388 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38389 d
.one_operand_p
= false;
38390 d
.testing_p
= false;
38392 for (i
= 0; i
< nelt
; ++i
)
38393 d
.perm
[i
] = i
* 2 + odd
;
38395 /* We'll either be able to implement the permutation directly... */
38396 if (expand_vec_perm_1 (&d
))
38399 /* ... or we use the special-case patterns. */
38400 expand_vec_perm_even_odd_1 (&d
, odd
);
38403 /* Expand an insert into a vector register through pinsr insn.
38404 Return true if successful. */
38407 ix86_expand_pinsr (rtx
*operands
)
38409 rtx dst
= operands
[0];
38410 rtx src
= operands
[3];
38412 unsigned int size
= INTVAL (operands
[1]);
38413 unsigned int pos
= INTVAL (operands
[2]);
38415 if (GET_CODE (dst
) == SUBREG
)
38417 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
38418 dst
= SUBREG_REG (dst
);
38421 if (GET_CODE (src
) == SUBREG
)
38422 src
= SUBREG_REG (src
);
38424 switch (GET_MODE (dst
))
38431 enum machine_mode srcmode
, dstmode
;
38432 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
38434 srcmode
= mode_for_size (size
, MODE_INT
, 0);
38439 if (!TARGET_SSE4_1
)
38441 dstmode
= V16QImode
;
38442 pinsr
= gen_sse4_1_pinsrb
;
38448 dstmode
= V8HImode
;
38449 pinsr
= gen_sse2_pinsrw
;
38453 if (!TARGET_SSE4_1
)
38455 dstmode
= V4SImode
;
38456 pinsr
= gen_sse4_1_pinsrd
;
38460 gcc_assert (TARGET_64BIT
);
38461 if (!TARGET_SSE4_1
)
38463 dstmode
= V2DImode
;
38464 pinsr
= gen_sse4_1_pinsrq
;
38471 dst
= gen_lowpart (dstmode
, dst
);
38472 src
= gen_lowpart (srcmode
, src
);
38476 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
38485 /* This function returns the calling abi specific va_list type node.
38486 It returns the FNDECL specific va_list type. */
38489 ix86_fn_abi_va_list (tree fndecl
)
38492 return va_list_type_node
;
38493 gcc_assert (fndecl
!= NULL_TREE
);
38495 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
38496 return ms_va_list_type_node
;
38498 return sysv_va_list_type_node
;
38501 /* Returns the canonical va_list type specified by TYPE. If there
38502 is no valid TYPE provided, it return NULL_TREE. */
38505 ix86_canonical_va_list_type (tree type
)
38509 /* Resolve references and pointers to va_list type. */
38510 if (TREE_CODE (type
) == MEM_REF
)
38511 type
= TREE_TYPE (type
);
38512 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
38513 type
= TREE_TYPE (type
);
38514 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
38515 type
= TREE_TYPE (type
);
38517 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
38519 wtype
= va_list_type_node
;
38520 gcc_assert (wtype
!= NULL_TREE
);
38522 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38524 /* If va_list is an array type, the argument may have decayed
38525 to a pointer type, e.g. by being passed to another function.
38526 In that case, unwrap both types so that we can compare the
38527 underlying records. */
38528 if (TREE_CODE (htype
) == ARRAY_TYPE
38529 || POINTER_TYPE_P (htype
))
38531 wtype
= TREE_TYPE (wtype
);
38532 htype
= TREE_TYPE (htype
);
38535 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38536 return va_list_type_node
;
38537 wtype
= sysv_va_list_type_node
;
38538 gcc_assert (wtype
!= NULL_TREE
);
38540 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38542 /* If va_list is an array type, the argument may have decayed
38543 to a pointer type, e.g. by being passed to another function.
38544 In that case, unwrap both types so that we can compare the
38545 underlying records. */
38546 if (TREE_CODE (htype
) == ARRAY_TYPE
38547 || POINTER_TYPE_P (htype
))
38549 wtype
= TREE_TYPE (wtype
);
38550 htype
= TREE_TYPE (htype
);
38553 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38554 return sysv_va_list_type_node
;
38555 wtype
= ms_va_list_type_node
;
38556 gcc_assert (wtype
!= NULL_TREE
);
38558 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38560 /* If va_list is an array type, the argument may have decayed
38561 to a pointer type, e.g. by being passed to another function.
38562 In that case, unwrap both types so that we can compare the
38563 underlying records. */
38564 if (TREE_CODE (htype
) == ARRAY_TYPE
38565 || POINTER_TYPE_P (htype
))
38567 wtype
= TREE_TYPE (wtype
);
38568 htype
= TREE_TYPE (htype
);
38571 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38572 return ms_va_list_type_node
;
38575 return std_canonical_va_list_type (type
);
38578 /* Iterate through the target-specific builtin types for va_list.
38579 IDX denotes the iterator, *PTREE is set to the result type of
38580 the va_list builtin, and *PNAME to its internal type.
38581 Returns zero if there is no element for this index, otherwise
38582 IDX should be increased upon the next call.
38583 Note, do not iterate a base builtin's name like __builtin_va_list.
38584 Used from c_common_nodes_and_builtins. */
38587 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
38597 *ptree
= ms_va_list_type_node
;
38598 *pname
= "__builtin_ms_va_list";
38602 *ptree
= sysv_va_list_type_node
;
38603 *pname
= "__builtin_sysv_va_list";
38611 #undef TARGET_SCHED_DISPATCH
38612 #define TARGET_SCHED_DISPATCH has_dispatch
38613 #undef TARGET_SCHED_DISPATCH_DO
38614 #define TARGET_SCHED_DISPATCH_DO do_dispatch
38615 #undef TARGET_SCHED_REASSOCIATION_WIDTH
38616 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
38617 #undef TARGET_SCHED_REORDER
38618 #define TARGET_SCHED_REORDER ix86_sched_reorder
38620 /* The size of the dispatch window is the total number of bytes of
38621 object code allowed in a window. */
38622 #define DISPATCH_WINDOW_SIZE 16
38624 /* Number of dispatch windows considered for scheduling. */
38625 #define MAX_DISPATCH_WINDOWS 3
38627 /* Maximum number of instructions in a window. */
38630 /* Maximum number of immediate operands in a window. */
38633 /* Maximum number of immediate bits allowed in a window. */
38634 #define MAX_IMM_SIZE 128
38636 /* Maximum number of 32 bit immediates allowed in a window. */
38637 #define MAX_IMM_32 4
38639 /* Maximum number of 64 bit immediates allowed in a window. */
38640 #define MAX_IMM_64 2
38642 /* Maximum total of loads or prefetches allowed in a window. */
38645 /* Maximum total of stores allowed in a window. */
38646 #define MAX_STORE 1
38652 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
38653 enum dispatch_group
{
38668 /* Number of allowable groups in a dispatch window. It is an array
38669 indexed by dispatch_group enum. 100 is used as a big number,
38670 because the number of these kind of operations does not have any
38671 effect in dispatch window, but we need them for other reasons in
38673 static unsigned int num_allowable_groups
[disp_last
] = {
38674 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
38677 char group_name
[disp_last
+ 1][16] = {
38678 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
38679 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
38680 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
38683 /* Instruction path. */
38686 path_single
, /* Single micro op. */
38687 path_double
, /* Double micro op. */
38688 path_multi
, /* Instructions with more than 2 micro op.. */
38692 /* sched_insn_info defines a window to the instructions scheduled in
38693 the basic block. It contains a pointer to the insn_info table and
38694 the instruction scheduled.
38696 Windows are allocated for each basic block and are linked
38698 typedef struct sched_insn_info_s
{
38700 enum dispatch_group group
;
38701 enum insn_path path
;
38706 /* Linked list of dispatch windows. This is a two way list of
38707 dispatch windows of a basic block. It contains information about
38708 the number of uops in the window and the total number of
38709 instructions and of bytes in the object code for this dispatch
38711 typedef struct dispatch_windows_s
{
38712 int num_insn
; /* Number of insn in the window. */
38713 int num_uops
; /* Number of uops in the window. */
38714 int window_size
; /* Number of bytes in the window. */
38715 int window_num
; /* Window number between 0 or 1. */
38716 int num_imm
; /* Number of immediates in an insn. */
38717 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
38718 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
38719 int imm_size
; /* Total immediates in the window. */
38720 int num_loads
; /* Total memory loads in the window. */
38721 int num_stores
; /* Total memory stores in the window. */
38722 int violation
; /* Violation exists in window. */
38723 sched_insn_info
*window
; /* Pointer to the window. */
38724 struct dispatch_windows_s
*next
;
38725 struct dispatch_windows_s
*prev
;
38726 } dispatch_windows
;
38728 /* Immediate valuse used in an insn. */
38729 typedef struct imm_info_s
38736 static dispatch_windows
*dispatch_window_list
;
38737 static dispatch_windows
*dispatch_window_list1
;
38739 /* Get dispatch group of insn. */
38741 static enum dispatch_group
38742 get_mem_group (rtx insn
)
38744 enum attr_memory memory
;
38746 if (INSN_CODE (insn
) < 0)
38747 return disp_no_group
;
38748 memory
= get_attr_memory (insn
);
38749 if (memory
== MEMORY_STORE
)
38752 if (memory
== MEMORY_LOAD
)
38755 if (memory
== MEMORY_BOTH
)
38756 return disp_load_store
;
38758 return disp_no_group
;
38761 /* Return true if insn is a compare instruction. */
38766 enum attr_type type
;
38768 type
= get_attr_type (insn
);
38769 return (type
== TYPE_TEST
38770 || type
== TYPE_ICMP
38771 || type
== TYPE_FCMP
38772 || GET_CODE (PATTERN (insn
)) == COMPARE
);
38775 /* Return true if a dispatch violation encountered. */
38778 dispatch_violation (void)
38780 if (dispatch_window_list
->next
)
38781 return dispatch_window_list
->next
->violation
;
38782 return dispatch_window_list
->violation
;
38785 /* Return true if insn is a branch instruction. */
38788 is_branch (rtx insn
)
38790 return (CALL_P (insn
) || JUMP_P (insn
));
38793 /* Return true if insn is a prefetch instruction. */
38796 is_prefetch (rtx insn
)
38798 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
38801 /* This function initializes a dispatch window and the list container holding a
38802 pointer to the window. */
38805 init_window (int window_num
)
38808 dispatch_windows
*new_list
;
38810 if (window_num
== 0)
38811 new_list
= dispatch_window_list
;
38813 new_list
= dispatch_window_list1
;
38815 new_list
->num_insn
= 0;
38816 new_list
->num_uops
= 0;
38817 new_list
->window_size
= 0;
38818 new_list
->next
= NULL
;
38819 new_list
->prev
= NULL
;
38820 new_list
->window_num
= window_num
;
38821 new_list
->num_imm
= 0;
38822 new_list
->num_imm_32
= 0;
38823 new_list
->num_imm_64
= 0;
38824 new_list
->imm_size
= 0;
38825 new_list
->num_loads
= 0;
38826 new_list
->num_stores
= 0;
38827 new_list
->violation
= false;
38829 for (i
= 0; i
< MAX_INSN
; i
++)
38831 new_list
->window
[i
].insn
= NULL
;
38832 new_list
->window
[i
].group
= disp_no_group
;
38833 new_list
->window
[i
].path
= no_path
;
38834 new_list
->window
[i
].byte_len
= 0;
38835 new_list
->window
[i
].imm_bytes
= 0;
38840 /* This function allocates and initializes a dispatch window and the
38841 list container holding a pointer to the window. */
38843 static dispatch_windows
*
38844 allocate_window (void)
38846 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
38847 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
38852 /* This routine initializes the dispatch scheduling information. It
38853 initiates building dispatch scheduler tables and constructs the
38854 first dispatch window. */
38857 init_dispatch_sched (void)
38859 /* Allocate a dispatch list and a window. */
38860 dispatch_window_list
= allocate_window ();
38861 dispatch_window_list1
= allocate_window ();
38866 /* This function returns true if a branch is detected. End of a basic block
38867 does not have to be a branch, but here we assume only branches end a
38871 is_end_basic_block (enum dispatch_group group
)
38873 return group
== disp_branch
;
38876 /* This function is called when the end of a window processing is reached. */
38879 process_end_window (void)
38881 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
38882 if (dispatch_window_list
->next
)
38884 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
38885 gcc_assert (dispatch_window_list
->window_size
38886 + dispatch_window_list1
->window_size
<= 48);
38892 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38893 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38894 for 48 bytes of instructions. Note that these windows are not dispatch
38895 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38897 static dispatch_windows
*
38898 allocate_next_window (int window_num
)
38900 if (window_num
== 0)
38902 if (dispatch_window_list
->next
)
38905 return dispatch_window_list
;
38908 dispatch_window_list
->next
= dispatch_window_list1
;
38909 dispatch_window_list1
->prev
= dispatch_window_list
;
38911 return dispatch_window_list1
;
38914 /* Increment the number of immediate operands of an instruction. */
38917 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
38922 switch ( GET_CODE (*in_rtx
))
38927 (imm_values
->imm
)++;
38928 if (x86_64_immediate_operand (*in_rtx
, SImode
))
38929 (imm_values
->imm32
)++;
38931 (imm_values
->imm64
)++;
38935 (imm_values
->imm
)++;
38936 (imm_values
->imm64
)++;
38940 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
38942 (imm_values
->imm
)++;
38943 (imm_values
->imm32
)++;
38954 /* Compute number of immediate operands of an instruction. */
38957 find_constant (rtx in_rtx
, imm_info
*imm_values
)
38959 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
38960 (rtx_function
) find_constant_1
, (void *) imm_values
);
38963 /* Return total size of immediate operands of an instruction along with number
38964 of corresponding immediate-operands. It initializes its parameters to zero
38965 befor calling FIND_CONSTANT.
38966 INSN is the input instruction. IMM is the total of immediates.
38967 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38971 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
38973 imm_info imm_values
= {0, 0, 0};
38975 find_constant (insn
, &imm_values
);
38976 *imm
= imm_values
.imm
;
38977 *imm32
= imm_values
.imm32
;
38978 *imm64
= imm_values
.imm64
;
38979 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
38982 /* This function indicates if an operand of an instruction is an
38986 has_immediate (rtx insn
)
38988 int num_imm_operand
;
38989 int num_imm32_operand
;
38990 int num_imm64_operand
;
38993 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38994 &num_imm64_operand
);
38998 /* Return single or double path for instructions. */
39000 static enum insn_path
39001 get_insn_path (rtx insn
)
39003 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
39005 if ((int)path
== 0)
39006 return path_single
;
39008 if ((int)path
== 1)
39009 return path_double
;
39014 /* Return insn dispatch group. */
39016 static enum dispatch_group
39017 get_insn_group (rtx insn
)
39019 enum dispatch_group group
= get_mem_group (insn
);
39023 if (is_branch (insn
))
39024 return disp_branch
;
39029 if (has_immediate (insn
))
39032 if (is_prefetch (insn
))
39033 return disp_prefetch
;
39035 return disp_no_group
;
39038 /* Count number of GROUP restricted instructions in a dispatch
39039 window WINDOW_LIST. */
39042 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
39044 enum dispatch_group group
= get_insn_group (insn
);
39046 int num_imm_operand
;
39047 int num_imm32_operand
;
39048 int num_imm64_operand
;
39050 if (group
== disp_no_group
)
39053 if (group
== disp_imm
)
39055 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39056 &num_imm64_operand
);
39057 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
39058 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
39059 || (num_imm32_operand
> 0
39060 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
39061 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
39062 || (num_imm64_operand
> 0
39063 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
39064 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
39065 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
39066 && num_imm64_operand
> 0
39067 && ((window_list
->num_imm_64
> 0
39068 && window_list
->num_insn
>= 2)
39069 || window_list
->num_insn
>= 3)))
39075 if ((group
== disp_load_store
39076 && (window_list
->num_loads
>= MAX_LOAD
39077 || window_list
->num_stores
>= MAX_STORE
))
39078 || ((group
== disp_load
39079 || group
== disp_prefetch
)
39080 && window_list
->num_loads
>= MAX_LOAD
)
39081 || (group
== disp_store
39082 && window_list
->num_stores
>= MAX_STORE
))
39088 /* This function returns true if insn satisfies dispatch rules on the
39089 last window scheduled. */
39092 fits_dispatch_window (rtx insn
)
39094 dispatch_windows
*window_list
= dispatch_window_list
;
39095 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
39096 unsigned int num_restrict
;
39097 enum dispatch_group group
= get_insn_group (insn
);
39098 enum insn_path path
= get_insn_path (insn
);
39101 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
39102 instructions should be given the lowest priority in the
39103 scheduling process in Haifa scheduler to make sure they will be
39104 scheduled in the same dispatch window as the reference to them. */
39105 if (group
== disp_jcc
|| group
== disp_cmp
)
39108 /* Check nonrestricted. */
39109 if (group
== disp_no_group
|| group
== disp_branch
)
39112 /* Get last dispatch window. */
39113 if (window_list_next
)
39114 window_list
= window_list_next
;
39116 if (window_list
->window_num
== 1)
39118 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
39121 || (min_insn_size (insn
) + sum
) >= 48)
39122 /* Window 1 is full. Go for next window. */
39126 num_restrict
= count_num_restricted (insn
, window_list
);
39128 if (num_restrict
> num_allowable_groups
[group
])
39131 /* See if it fits in the first window. */
39132 if (window_list
->window_num
== 0)
39134 /* The first widow should have only single and double path
39136 if (path
== path_double
39137 && (window_list
->num_uops
+ 2) > MAX_INSN
)
39139 else if (path
!= path_single
)
39145 /* Add an instruction INSN with NUM_UOPS micro-operations to the
39146 dispatch window WINDOW_LIST. */
39149 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
39151 int byte_len
= min_insn_size (insn
);
39152 int num_insn
= window_list
->num_insn
;
39154 sched_insn_info
*window
= window_list
->window
;
39155 enum dispatch_group group
= get_insn_group (insn
);
39156 enum insn_path path
= get_insn_path (insn
);
39157 int num_imm_operand
;
39158 int num_imm32_operand
;
39159 int num_imm64_operand
;
39161 if (!window_list
->violation
&& group
!= disp_cmp
39162 && !fits_dispatch_window (insn
))
39163 window_list
->violation
= true;
39165 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39166 &num_imm64_operand
);
39168 /* Initialize window with new instruction. */
39169 window
[num_insn
].insn
= insn
;
39170 window
[num_insn
].byte_len
= byte_len
;
39171 window
[num_insn
].group
= group
;
39172 window
[num_insn
].path
= path
;
39173 window
[num_insn
].imm_bytes
= imm_size
;
39175 window_list
->window_size
+= byte_len
;
39176 window_list
->num_insn
= num_insn
+ 1;
39177 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
39178 window_list
->imm_size
+= imm_size
;
39179 window_list
->num_imm
+= num_imm_operand
;
39180 window_list
->num_imm_32
+= num_imm32_operand
;
39181 window_list
->num_imm_64
+= num_imm64_operand
;
39183 if (group
== disp_store
)
39184 window_list
->num_stores
+= 1;
39185 else if (group
== disp_load
39186 || group
== disp_prefetch
)
39187 window_list
->num_loads
+= 1;
39188 else if (group
== disp_load_store
)
39190 window_list
->num_stores
+= 1;
39191 window_list
->num_loads
+= 1;
39195 /* Adds a scheduled instruction, INSN, to the current dispatch window.
39196 If the total bytes of instructions or the number of instructions in
39197 the window exceed allowable, it allocates a new window. */
39200 add_to_dispatch_window (rtx insn
)
39203 dispatch_windows
*window_list
;
39204 dispatch_windows
*next_list
;
39205 dispatch_windows
*window0_list
;
39206 enum insn_path path
;
39207 enum dispatch_group insn_group
;
39215 if (INSN_CODE (insn
) < 0)
39218 byte_len
= min_insn_size (insn
);
39219 window_list
= dispatch_window_list
;
39220 next_list
= window_list
->next
;
39221 path
= get_insn_path (insn
);
39222 insn_group
= get_insn_group (insn
);
39224 /* Get the last dispatch window. */
39226 window_list
= dispatch_window_list
->next
;
39228 if (path
== path_single
)
39230 else if (path
== path_double
)
39233 insn_num_uops
= (int) path
;
39235 /* If current window is full, get a new window.
39236 Window number zero is full, if MAX_INSN uops are scheduled in it.
39237 Window number one is full, if window zero's bytes plus window
39238 one's bytes is 32, or if the bytes of the new instruction added
39239 to the total makes it greater than 48, or it has already MAX_INSN
39240 instructions in it. */
39241 num_insn
= window_list
->num_insn
;
39242 num_uops
= window_list
->num_uops
;
39243 window_num
= window_list
->window_num
;
39244 insn_fits
= fits_dispatch_window (insn
);
39246 if (num_insn
>= MAX_INSN
39247 || num_uops
+ insn_num_uops
> MAX_INSN
39250 window_num
= ~window_num
& 1;
39251 window_list
= allocate_next_window (window_num
);
39254 if (window_num
== 0)
39256 add_insn_window (insn
, window_list
, insn_num_uops
);
39257 if (window_list
->num_insn
>= MAX_INSN
39258 && insn_group
== disp_branch
)
39260 process_end_window ();
39264 else if (window_num
== 1)
39266 window0_list
= window_list
->prev
;
39267 sum
= window0_list
->window_size
+ window_list
->window_size
;
39269 || (byte_len
+ sum
) >= 48)
39271 process_end_window ();
39272 window_list
= dispatch_window_list
;
39275 add_insn_window (insn
, window_list
, insn_num_uops
);
39278 gcc_unreachable ();
39280 if (is_end_basic_block (insn_group
))
39282 /* End of basic block is reached do end-basic-block process. */
39283 process_end_window ();
39288 /* Print the dispatch window, WINDOW_NUM, to FILE. */
39290 DEBUG_FUNCTION
static void
39291 debug_dispatch_window_file (FILE *file
, int window_num
)
39293 dispatch_windows
*list
;
39296 if (window_num
== 0)
39297 list
= dispatch_window_list
;
39299 list
= dispatch_window_list1
;
39301 fprintf (file
, "Window #%d:\n", list
->window_num
);
39302 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
39303 list
->num_insn
, list
->num_uops
, list
->window_size
);
39304 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
39305 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
39307 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
39309 fprintf (file
, " insn info:\n");
39311 for (i
= 0; i
< MAX_INSN
; i
++)
39313 if (!list
->window
[i
].insn
)
39315 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
39316 i
, group_name
[list
->window
[i
].group
],
39317 i
, (void *)list
->window
[i
].insn
,
39318 i
, list
->window
[i
].path
,
39319 i
, list
->window
[i
].byte_len
,
39320 i
, list
->window
[i
].imm_bytes
);
39324 /* Print to stdout a dispatch window. */
39326 DEBUG_FUNCTION
void
39327 debug_dispatch_window (int window_num
)
39329 debug_dispatch_window_file (stdout
, window_num
);
39332 /* Print INSN dispatch information to FILE. */
39334 DEBUG_FUNCTION
static void
39335 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
39338 enum insn_path path
;
39339 enum dispatch_group group
;
39341 int num_imm_operand
;
39342 int num_imm32_operand
;
39343 int num_imm64_operand
;
39345 if (INSN_CODE (insn
) < 0)
39348 byte_len
= min_insn_size (insn
);
39349 path
= get_insn_path (insn
);
39350 group
= get_insn_group (insn
);
39351 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39352 &num_imm64_operand
);
39354 fprintf (file
, " insn info:\n");
39355 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
39356 group_name
[group
], path
, byte_len
);
39357 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
39358 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
39361 /* Print to STDERR the status of the ready list with respect to
39362 dispatch windows. */
39364 DEBUG_FUNCTION
void
39365 debug_ready_dispatch (void)
39368 int no_ready
= number_in_ready ();
39370 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
39372 for (i
= 0; i
< no_ready
; i
++)
39373 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
39376 /* This routine is the driver of the dispatch scheduler. */
39379 do_dispatch (rtx insn
, int mode
)
39381 if (mode
== DISPATCH_INIT
)
39382 init_dispatch_sched ();
39383 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
39384 add_to_dispatch_window (insn
);
39387 /* Return TRUE if Dispatch Scheduling is supported. */
39390 has_dispatch (rtx insn
, int action
)
39392 if ((TARGET_BDVER1
|| TARGET_BDVER2
)
39393 && flag_dispatch_scheduler
)
39399 case IS_DISPATCH_ON
:
39404 return is_cmp (insn
);
39406 case DISPATCH_VIOLATION
:
39407 return dispatch_violation ();
39409 case FITS_DISPATCH_WINDOW
:
39410 return fits_dispatch_window (insn
);
39416 /* Implementation of reassociation_width target hook used by
39417 reassoc phase to identify parallelism level in reassociated
39418 tree. Statements tree_code is passed in OPC. Arguments type
39421 Currently parallel reassociation is enabled for Atom
39422 processors only and we set reassociation width to be 2
39423 because Atom may issue up to 2 instructions per cycle.
39425 Return value should be fixed if parallel reassociation is
39426 enabled for other processors. */
39429 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
39430 enum machine_mode mode
)
39434 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
39436 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
39442 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
39443 place emms and femms instructions. */
39445 static enum machine_mode
39446 ix86_preferred_simd_mode (enum machine_mode mode
)
39454 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
39456 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
39458 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
39460 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
39463 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
39469 if (!TARGET_VECTORIZE_DOUBLE
)
39471 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
39473 else if (TARGET_SSE2
)
39482 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
39485 static unsigned int
39486 ix86_autovectorize_vector_sizes (void)
39488 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
39491 /* Validate target specific memory model bits in VAL. */
39493 static unsigned HOST_WIDE_INT
39494 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
39496 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
39497 unsigned HOST_WIDE_INT strong
;
39499 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
39501 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
39503 warning (OPT_Winvalid_memory_model
,
39504 "Unknown architecture specific memory model");
39505 return MEMMODEL_SEQ_CST
;
39507 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
39508 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
39510 warning (OPT_Winvalid_memory_model
,
39511 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
39512 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
39514 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
39516 warning (OPT_Winvalid_memory_model
,
39517 "HLE_RELEASE not used with RELEASE or stronger memory model");
39518 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
39523 /* Initialize the GCC target structure. */
39524 #undef TARGET_RETURN_IN_MEMORY
39525 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
39527 #undef TARGET_LEGITIMIZE_ADDRESS
39528 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
39530 #undef TARGET_ATTRIBUTE_TABLE
39531 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
39532 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39533 # undef TARGET_MERGE_DECL_ATTRIBUTES
39534 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
39537 #undef TARGET_COMP_TYPE_ATTRIBUTES
39538 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
39540 #undef TARGET_INIT_BUILTINS
39541 #define TARGET_INIT_BUILTINS ix86_init_builtins
39542 #undef TARGET_BUILTIN_DECL
39543 #define TARGET_BUILTIN_DECL ix86_builtin_decl
39544 #undef TARGET_EXPAND_BUILTIN
39545 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
39547 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
39548 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
39549 ix86_builtin_vectorized_function
39551 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
39552 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
39554 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
39555 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
39557 #undef TARGET_VECTORIZE_BUILTIN_GATHER
39558 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
39560 #undef TARGET_BUILTIN_RECIPROCAL
39561 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
39563 #undef TARGET_ASM_FUNCTION_EPILOGUE
39564 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
39566 #undef TARGET_ENCODE_SECTION_INFO
39567 #ifndef SUBTARGET_ENCODE_SECTION_INFO
39568 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
39570 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
39573 #undef TARGET_ASM_OPEN_PAREN
39574 #define TARGET_ASM_OPEN_PAREN ""
39575 #undef TARGET_ASM_CLOSE_PAREN
39576 #define TARGET_ASM_CLOSE_PAREN ""
39578 #undef TARGET_ASM_BYTE_OP
39579 #define TARGET_ASM_BYTE_OP ASM_BYTE
39581 #undef TARGET_ASM_ALIGNED_HI_OP
39582 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
39583 #undef TARGET_ASM_ALIGNED_SI_OP
39584 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
39586 #undef TARGET_ASM_ALIGNED_DI_OP
39587 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
39590 #undef TARGET_PROFILE_BEFORE_PROLOGUE
39591 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
39593 #undef TARGET_ASM_UNALIGNED_HI_OP
39594 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
39595 #undef TARGET_ASM_UNALIGNED_SI_OP
39596 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
39597 #undef TARGET_ASM_UNALIGNED_DI_OP
39598 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
39600 #undef TARGET_PRINT_OPERAND
39601 #define TARGET_PRINT_OPERAND ix86_print_operand
39602 #undef TARGET_PRINT_OPERAND_ADDRESS
39603 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
39604 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
39605 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
39606 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
39607 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
39609 #undef TARGET_SCHED_INIT_GLOBAL
39610 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
39611 #undef TARGET_SCHED_ADJUST_COST
39612 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
39613 #undef TARGET_SCHED_ISSUE_RATE
39614 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
39615 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
39616 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
39617 ia32_multipass_dfa_lookahead
39619 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
39620 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
39622 #undef TARGET_MEMMODEL_CHECK
39623 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
39626 #undef TARGET_HAVE_TLS
39627 #define TARGET_HAVE_TLS true
39629 #undef TARGET_CANNOT_FORCE_CONST_MEM
39630 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
39631 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
39632 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
39634 #undef TARGET_DELEGITIMIZE_ADDRESS
39635 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
39637 #undef TARGET_MS_BITFIELD_LAYOUT_P
39638 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
39641 #undef TARGET_BINDS_LOCAL_P
39642 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
39644 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39645 #undef TARGET_BINDS_LOCAL_P
39646 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
39649 #undef TARGET_ASM_OUTPUT_MI_THUNK
39650 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
39651 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
39652 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
39654 #undef TARGET_ASM_FILE_START
39655 #define TARGET_ASM_FILE_START x86_file_start
39657 #undef TARGET_OPTION_OVERRIDE
39658 #define TARGET_OPTION_OVERRIDE ix86_option_override
39660 #undef TARGET_REGISTER_MOVE_COST
39661 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
39662 #undef TARGET_MEMORY_MOVE_COST
39663 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
39664 #undef TARGET_RTX_COSTS
39665 #define TARGET_RTX_COSTS ix86_rtx_costs
39666 #undef TARGET_ADDRESS_COST
39667 #define TARGET_ADDRESS_COST ix86_address_cost
39669 #undef TARGET_FIXED_CONDITION_CODE_REGS
39670 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
39671 #undef TARGET_CC_MODES_COMPATIBLE
39672 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
39674 #undef TARGET_MACHINE_DEPENDENT_REORG
39675 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
39677 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
39678 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
39680 #undef TARGET_BUILD_BUILTIN_VA_LIST
39681 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
39683 #undef TARGET_FOLD_BUILTIN
39684 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
39686 #undef TARGET_ENUM_VA_LIST_P
39687 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
39689 #undef TARGET_FN_ABI_VA_LIST
39690 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
39692 #undef TARGET_CANONICAL_VA_LIST_TYPE
39693 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
39695 #undef TARGET_EXPAND_BUILTIN_VA_START
39696 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
39698 #undef TARGET_MD_ASM_CLOBBERS
39699 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
39701 #undef TARGET_PROMOTE_PROTOTYPES
39702 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
39703 #undef TARGET_STRUCT_VALUE_RTX
39704 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
39705 #undef TARGET_SETUP_INCOMING_VARARGS
39706 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
39707 #undef TARGET_MUST_PASS_IN_STACK
39708 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
39709 #undef TARGET_FUNCTION_ARG_ADVANCE
39710 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
39711 #undef TARGET_FUNCTION_ARG
39712 #define TARGET_FUNCTION_ARG ix86_function_arg
39713 #undef TARGET_FUNCTION_ARG_BOUNDARY
39714 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
39715 #undef TARGET_PASS_BY_REFERENCE
39716 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
39717 #undef TARGET_INTERNAL_ARG_POINTER
39718 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
39719 #undef TARGET_UPDATE_STACK_BOUNDARY
39720 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
39721 #undef TARGET_GET_DRAP_RTX
39722 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
39723 #undef TARGET_STRICT_ARGUMENT_NAMING
39724 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
39725 #undef TARGET_STATIC_CHAIN
39726 #define TARGET_STATIC_CHAIN ix86_static_chain
39727 #undef TARGET_TRAMPOLINE_INIT
39728 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
39729 #undef TARGET_RETURN_POPS_ARGS
39730 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
39732 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
39733 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
39735 #undef TARGET_SCALAR_MODE_SUPPORTED_P
39736 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
39738 #undef TARGET_VECTOR_MODE_SUPPORTED_P
39739 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
39741 #undef TARGET_C_MODE_FOR_SUFFIX
39742 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
39745 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
39746 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
39749 #ifdef SUBTARGET_INSERT_ATTRIBUTES
39750 #undef TARGET_INSERT_ATTRIBUTES
39751 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
39754 #undef TARGET_MANGLE_TYPE
39755 #define TARGET_MANGLE_TYPE ix86_mangle_type
39758 #undef TARGET_STACK_PROTECT_FAIL
39759 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
39762 #undef TARGET_FUNCTION_VALUE
39763 #define TARGET_FUNCTION_VALUE ix86_function_value
39765 #undef TARGET_FUNCTION_VALUE_REGNO_P
39766 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
39768 #undef TARGET_PROMOTE_FUNCTION_MODE
39769 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
39771 #undef TARGET_SECONDARY_RELOAD
39772 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
39774 #undef TARGET_CLASS_MAX_NREGS
39775 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
39777 #undef TARGET_PREFERRED_RELOAD_CLASS
39778 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
39779 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
39780 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
39781 #undef TARGET_CLASS_LIKELY_SPILLED_P
39782 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
39784 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
39785 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
39786 ix86_builtin_vectorization_cost
39787 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
39788 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
39789 ix86_vectorize_vec_perm_const_ok
39790 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
39791 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
39792 ix86_preferred_simd_mode
39793 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
39794 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
39795 ix86_autovectorize_vector_sizes
39797 #undef TARGET_SET_CURRENT_FUNCTION
39798 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
39800 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
39801 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
39803 #undef TARGET_OPTION_SAVE
39804 #define TARGET_OPTION_SAVE ix86_function_specific_save
39806 #undef TARGET_OPTION_RESTORE
39807 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
39809 #undef TARGET_OPTION_PRINT
39810 #define TARGET_OPTION_PRINT ix86_function_specific_print
39812 #undef TARGET_CAN_INLINE_P
39813 #define TARGET_CAN_INLINE_P ix86_can_inline_p
39815 #undef TARGET_EXPAND_TO_RTL_HOOK
39816 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
39818 #undef TARGET_LEGITIMATE_ADDRESS_P
39819 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
39821 #undef TARGET_LEGITIMATE_CONSTANT_P
39822 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
39824 #undef TARGET_FRAME_POINTER_REQUIRED
39825 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
39827 #undef TARGET_CAN_ELIMINATE
39828 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
39830 #undef TARGET_EXTRA_LIVE_ON_ENTRY
39831 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
39833 #undef TARGET_ASM_CODE_END
39834 #define TARGET_ASM_CODE_END ix86_code_end
39836 #undef TARGET_CONDITIONAL_REGISTER_USAGE
39837 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
39840 #undef TARGET_INIT_LIBFUNCS
39841 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
39844 struct gcc_target targetm
= TARGET_INITIALIZER
;
39846 #include "gt-i386.h"