1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
65 enum upper_128bits_state
72 typedef struct block_info_def
74 /* State of the upper 128bits of AVX registers at exit. */
75 enum upper_128bits_state state
;
76 /* TRUE if state of the upper 128bits of AVX registers is unchanged
79 /* TRUE if block has been processed. */
81 /* TRUE if block has been scanned. */
83 /* Previous state of the upper 128bits of AVX registers at entry. */
84 enum upper_128bits_state prev
;
87 #define BLOCK_INFO(B) ((block_info) (B)->aux)
89 enum call_avx256_state
91 /* Callee returns 256bit AVX register. */
92 callee_return_avx256
= -1,
93 /* Callee returns and passes 256bit AVX register. */
94 callee_return_pass_avx256
,
95 /* Callee passes 256bit AVX register. */
97 /* Callee doesn't return nor passe 256bit AVX register, or no
98 256bit AVX register in function return. */
100 /* vzeroupper intrinsic. */
104 /* Check if a 256bit AVX register is referenced in stores. */
107 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
110 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
111 || (GET_CODE (set
) == SET
112 && REG_P (SET_SRC (set
))
113 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
115 enum upper_128bits_state
*state
116 = (enum upper_128bits_state
*) data
;
121 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
122 in basic block BB. Delete it if upper 128bit AVX registers are
123 unused. If it isn't deleted, move it to just before a jump insn.
125 STATE is state of the upper 128bits of AVX registers at entry. */
128 move_or_delete_vzeroupper_2 (basic_block bb
,
129 enum upper_128bits_state state
)
132 rtx vzeroupper_insn
= NULL_RTX
;
137 if (BLOCK_INFO (bb
)->unchanged
)
140 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
143 BLOCK_INFO (bb
)->state
= state
;
147 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
150 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
151 bb
->index
, BLOCK_INFO (bb
)->state
);
155 BLOCK_INFO (bb
)->prev
= state
;
158 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
163 /* BB_END changes when it is deleted. */
164 bb_end
= BB_END (bb
);
166 while (insn
!= bb_end
)
168 insn
= NEXT_INSN (insn
);
170 if (!NONDEBUG_INSN_P (insn
))
173 /* Move vzeroupper before jump/call. */
174 if (JUMP_P (insn
) || CALL_P (insn
))
176 if (!vzeroupper_insn
)
179 if (PREV_INSN (insn
) != vzeroupper_insn
)
183 fprintf (dump_file
, "Move vzeroupper after:\n");
184 print_rtl_single (dump_file
, PREV_INSN (insn
));
185 fprintf (dump_file
, "before:\n");
186 print_rtl_single (dump_file
, insn
);
188 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
191 vzeroupper_insn
= NULL_RTX
;
195 pat
= PATTERN (insn
);
197 /* Check insn for vzeroupper intrinsic. */
198 if (GET_CODE (pat
) == UNSPEC_VOLATILE
199 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
203 /* Found vzeroupper intrinsic. */
204 fprintf (dump_file
, "Found vzeroupper:\n");
205 print_rtl_single (dump_file
, insn
);
210 /* Check insn for vzeroall intrinsic. */
211 if (GET_CODE (pat
) == PARALLEL
212 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
213 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
218 /* Delete pending vzeroupper insertion. */
221 delete_insn (vzeroupper_insn
);
222 vzeroupper_insn
= NULL_RTX
;
225 else if (state
!= used
)
227 note_stores (pat
, check_avx256_stores
, &state
);
234 /* Process vzeroupper intrinsic. */
235 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
239 /* Since the upper 128bits are cleared, callee must not pass
240 256bit AVX register. We only need to check if callee
241 returns 256bit AVX register. */
242 if (avx256
== callee_return_avx256
)
248 /* Remove unnecessary vzeroupper since upper 128bits are
252 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
253 print_rtl_single (dump_file
, insn
);
259 /* Set state to UNUSED if callee doesn't return 256bit AVX
261 if (avx256
!= callee_return_pass_avx256
)
264 if (avx256
== callee_return_pass_avx256
265 || avx256
== callee_pass_avx256
)
267 /* Must remove vzeroupper since callee passes in 256bit
271 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
272 print_rtl_single (dump_file
, insn
);
278 vzeroupper_insn
= insn
;
284 BLOCK_INFO (bb
)->state
= state
;
285 BLOCK_INFO (bb
)->unchanged
= unchanged
;
286 BLOCK_INFO (bb
)->scanned
= true;
289 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
290 bb
->index
, unchanged
? "unchanged" : "changed",
294 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
295 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
296 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
300 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
304 enum upper_128bits_state state
, old_state
, new_state
;
308 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
309 block
->index
, BLOCK_INFO (block
)->processed
);
311 if (BLOCK_INFO (block
)->processed
)
316 /* Check all predecessor edges of this block. */
317 seen_unknown
= false;
318 FOR_EACH_EDGE (e
, ei
, block
->preds
)
322 switch (BLOCK_INFO (e
->src
)->state
)
325 if (!unknown_is_unused
)
339 old_state
= BLOCK_INFO (block
)->state
;
340 move_or_delete_vzeroupper_2 (block
, state
);
341 new_state
= BLOCK_INFO (block
)->state
;
343 if (state
!= unknown
|| new_state
== used
)
344 BLOCK_INFO (block
)->processed
= true;
346 /* Need to rescan if the upper 128bits of AVX registers are changed
348 if (new_state
!= old_state
)
350 if (new_state
== used
)
351 cfun
->machine
->rescan_vzeroupper_p
= 1;
358 /* Go through the instruction stream looking for vzeroupper. Delete
359 it if upper 128bit AVX registers are unused. If it isn't deleted,
360 move it to just before a jump insn. */
363 move_or_delete_vzeroupper (void)
368 fibheap_t worklist
, pending
, fibheap_swap
;
369 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
374 /* Set up block info for each basic block. */
375 alloc_aux_for_blocks (sizeof (struct block_info_def
));
377 /* Process outgoing edges of entry point. */
379 fprintf (dump_file
, "Process outgoing edges of entry point\n");
381 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
383 move_or_delete_vzeroupper_2 (e
->dest
,
384 cfun
->machine
->caller_pass_avx256_p
386 BLOCK_INFO (e
->dest
)->processed
= true;
389 /* Compute reverse completion order of depth first search of the CFG
390 so that the data-flow runs faster. */
391 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
392 bb_order
= XNEWVEC (int, last_basic_block
);
393 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
394 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
395 bb_order
[rc_order
[i
]] = i
;
398 worklist
= fibheap_new ();
399 pending
= fibheap_new ();
400 visited
= sbitmap_alloc (last_basic_block
);
401 in_worklist
= sbitmap_alloc (last_basic_block
);
402 in_pending
= sbitmap_alloc (last_basic_block
);
403 sbitmap_zero (in_worklist
);
405 /* Don't check outgoing edges of entry point. */
406 sbitmap_ones (in_pending
);
408 if (BLOCK_INFO (bb
)->processed
)
409 RESET_BIT (in_pending
, bb
->index
);
412 move_or_delete_vzeroupper_1 (bb
, false);
413 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
417 fprintf (dump_file
, "Check remaining basic blocks\n");
419 while (!fibheap_empty (pending
))
421 fibheap_swap
= pending
;
423 worklist
= fibheap_swap
;
424 sbitmap_swap
= in_pending
;
425 in_pending
= in_worklist
;
426 in_worklist
= sbitmap_swap
;
428 sbitmap_zero (visited
);
430 cfun
->machine
->rescan_vzeroupper_p
= 0;
432 while (!fibheap_empty (worklist
))
434 bb
= (basic_block
) fibheap_extract_min (worklist
);
435 RESET_BIT (in_worklist
, bb
->index
);
436 gcc_assert (!TEST_BIT (visited
, bb
->index
));
437 if (!TEST_BIT (visited
, bb
->index
))
441 SET_BIT (visited
, bb
->index
);
443 if (move_or_delete_vzeroupper_1 (bb
, false))
444 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
446 if (e
->dest
== EXIT_BLOCK_PTR
447 || BLOCK_INFO (e
->dest
)->processed
)
450 if (TEST_BIT (visited
, e
->dest
->index
))
452 if (!TEST_BIT (in_pending
, e
->dest
->index
))
454 /* Send E->DEST to next round. */
455 SET_BIT (in_pending
, e
->dest
->index
);
456 fibheap_insert (pending
,
457 bb_order
[e
->dest
->index
],
461 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
463 /* Add E->DEST to current round. */
464 SET_BIT (in_worklist
, e
->dest
->index
);
465 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
472 if (!cfun
->machine
->rescan_vzeroupper_p
)
477 fibheap_delete (worklist
);
478 fibheap_delete (pending
);
479 sbitmap_free (visited
);
480 sbitmap_free (in_worklist
);
481 sbitmap_free (in_pending
);
484 fprintf (dump_file
, "Process remaining basic blocks\n");
487 move_or_delete_vzeroupper_1 (bb
, true);
489 free_aux_for_blocks ();
492 static rtx
legitimize_dllimport_symbol (rtx
, bool);
494 #ifndef CHECK_STACK_LIMIT
495 #define CHECK_STACK_LIMIT (-1)
498 /* Return index of given mode in mult and division cost tables. */
499 #define MODE_INDEX(mode) \
500 ((mode) == QImode ? 0 \
501 : (mode) == HImode ? 1 \
502 : (mode) == SImode ? 2 \
503 : (mode) == DImode ? 3 \
506 /* Processor costs (relative to an add) */
507 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
508 #define COSTS_N_BYTES(N) ((N) * 2)
510 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
513 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
514 COSTS_N_BYTES (2), /* cost of an add instruction */
515 COSTS_N_BYTES (3), /* cost of a lea instruction */
516 COSTS_N_BYTES (2), /* variable shift costs */
517 COSTS_N_BYTES (3), /* constant shift costs */
518 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
519 COSTS_N_BYTES (3), /* HI */
520 COSTS_N_BYTES (3), /* SI */
521 COSTS_N_BYTES (3), /* DI */
522 COSTS_N_BYTES (5)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
525 COSTS_N_BYTES (3), /* HI */
526 COSTS_N_BYTES (3), /* SI */
527 COSTS_N_BYTES (3), /* DI */
528 COSTS_N_BYTES (5)}, /* other */
529 COSTS_N_BYTES (3), /* cost of movsx */
530 COSTS_N_BYTES (3), /* cost of movzx */
531 0, /* "large" insn */
533 2, /* cost for loading QImode using movzbl */
534 {2, 2, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 2}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {2, 2, 2}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 3, /* cost of moving MMX register */
544 {3, 3}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {3, 3}, /* cost of storing MMX registers
547 in SImode and DImode */
548 3, /* cost of moving SSE register */
549 {3, 3, 3}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {3, 3, 3}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
559 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
561 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
562 COSTS_N_BYTES (2), /* cost of FABS instruction. */
563 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
564 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
566 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
567 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
568 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 1, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 1, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 /* Processor costs (relative to an add) */
584 struct processor_costs i386_cost
= { /* 386 specific costs */
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (1), /* cost of a lea instruction */
587 COSTS_N_INSNS (3), /* variable shift costs */
588 COSTS_N_INSNS (2), /* constant shift costs */
589 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (6), /* HI */
591 COSTS_N_INSNS (6), /* SI */
592 COSTS_N_INSNS (6), /* DI */
593 COSTS_N_INSNS (6)}, /* other */
594 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (23), /* HI */
597 COSTS_N_INSNS (23), /* SI */
598 COSTS_N_INSNS (23), /* DI */
599 COSTS_N_INSNS (23)}, /* other */
600 COSTS_N_INSNS (3), /* cost of movsx */
601 COSTS_N_INSNS (2), /* cost of movzx */
602 15, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {2, 4, 2}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {2, 4, 2}, /* cost of storing integer registers */
609 2, /* cost of reg,reg fld/fst */
610 {8, 8, 8}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {8, 8, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 8}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 8}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 8, 16}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 8, 16}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 3, /* MMX or SSE register to integer */
625 0, /* size of l1 cache */
626 0, /* size of l2 cache */
627 0, /* size of prefetch block */
628 0, /* number of parallel prefetches */
630 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (22), /* cost of FABS instruction. */
634 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
636 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
637 DUMMY_STRINGOP_ALGS
},
638 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
639 DUMMY_STRINGOP_ALGS
},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs i486_cost
= { /* 486 specific costs */
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (1), /* cost of a lea instruction */
657 COSTS_N_INSNS (3), /* variable shift costs */
658 COSTS_N_INSNS (2), /* constant shift costs */
659 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (12), /* HI */
661 COSTS_N_INSNS (12), /* SI */
662 COSTS_N_INSNS (12), /* DI */
663 COSTS_N_INSNS (12)}, /* other */
664 1, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (40), /* HI */
667 COSTS_N_INSNS (40), /* SI */
668 COSTS_N_INSNS (40), /* DI */
669 COSTS_N_INSNS (40)}, /* other */
670 COSTS_N_INSNS (3), /* cost of movsx */
671 COSTS_N_INSNS (2), /* cost of movzx */
672 15, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {2, 4, 2}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {2, 4, 2}, /* cost of storing integer registers */
679 2, /* cost of reg,reg fld/fst */
680 {8, 8, 8}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {8, 8, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {4, 8}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 8}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 8, 16}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 8, 16}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 3, /* MMX or SSE register to integer */
695 4, /* size of l1 cache. 486 has 8kB cache
696 shared for code and data, so 4kB is
697 not really precise. */
698 4, /* size of l2 cache */
699 0, /* size of prefetch block */
700 0, /* number of parallel prefetches */
702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (3), /* cost of FABS instruction. */
706 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
708 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
709 DUMMY_STRINGOP_ALGS
},
710 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
711 DUMMY_STRINGOP_ALGS
},
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
726 struct processor_costs pentium_cost
= {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1), /* cost of a lea instruction */
729 COSTS_N_INSNS (4), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (11), /* HI */
733 COSTS_N_INSNS (11), /* SI */
734 COSTS_N_INSNS (11), /* DI */
735 COSTS_N_INSNS (11)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (25), /* HI */
739 COSTS_N_INSNS (25), /* SI */
740 COSTS_N_INSNS (25), /* DI */
741 COSTS_N_INSNS (25)}, /* other */
742 COSTS_N_INSNS (3), /* cost of movsx */
743 COSTS_N_INSNS (2), /* cost of movzx */
744 8, /* "large" insn */
746 6, /* cost for loading QImode using movzbl */
747 {2, 4, 2}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {2, 4, 2}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {2, 2, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 6}, /* cost of storing fp registers
755 in SFmode, DFmode and XFmode */
756 8, /* cost of moving MMX register */
757 {8, 8}, /* cost of loading MMX registers
758 in SImode and DImode */
759 {8, 8}, /* cost of storing MMX registers
760 in SImode and DImode */
761 2, /* cost of moving SSE register */
762 {4, 8, 16}, /* cost of loading SSE registers
763 in SImode, DImode and TImode */
764 {4, 8, 16}, /* cost of storing SSE registers
765 in SImode, DImode and TImode */
766 3, /* MMX or SSE register to integer */
767 8, /* size of l1 cache. */
768 8, /* size of l2 cache */
769 0, /* size of prefetch block */
770 0, /* number of parallel prefetches */
772 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
773 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
774 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
775 COSTS_N_INSNS (1), /* cost of FABS instruction. */
776 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
777 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
778 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
779 DUMMY_STRINGOP_ALGS
},
780 {{libcall
, {{-1, rep_prefix_4_byte
}}},
781 DUMMY_STRINGOP_ALGS
},
782 1, /* scalar_stmt_cost. */
783 1, /* scalar load_cost. */
784 1, /* scalar_store_cost. */
785 1, /* vec_stmt_cost. */
786 1, /* vec_to_scalar_cost. */
787 1, /* scalar_to_vec_cost. */
788 1, /* vec_align_load_cost. */
789 2, /* vec_unalign_load_cost. */
790 1, /* vec_store_cost. */
791 3, /* cond_taken_branch_cost. */
792 1, /* cond_not_taken_branch_cost. */
796 struct processor_costs pentiumpro_cost
= {
797 COSTS_N_INSNS (1), /* cost of an add instruction */
798 COSTS_N_INSNS (1), /* cost of a lea instruction */
799 COSTS_N_INSNS (1), /* variable shift costs */
800 COSTS_N_INSNS (1), /* constant shift costs */
801 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
802 COSTS_N_INSNS (4), /* HI */
803 COSTS_N_INSNS (4), /* SI */
804 COSTS_N_INSNS (4), /* DI */
805 COSTS_N_INSNS (4)}, /* other */
806 0, /* cost of multiply per each bit set */
807 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
808 COSTS_N_INSNS (17), /* HI */
809 COSTS_N_INSNS (17), /* SI */
810 COSTS_N_INSNS (17), /* DI */
811 COSTS_N_INSNS (17)}, /* other */
812 COSTS_N_INSNS (1), /* cost of movsx */
813 COSTS_N_INSNS (1), /* cost of movzx */
814 8, /* "large" insn */
816 2, /* cost for loading QImode using movzbl */
817 {4, 4, 4}, /* cost of loading integer registers
818 in QImode, HImode and SImode.
819 Relative to reg-reg move (2). */
820 {2, 2, 2}, /* cost of storing integer registers */
821 2, /* cost of reg,reg fld/fst */
822 {2, 2, 6}, /* cost of loading fp registers
823 in SFmode, DFmode and XFmode */
824 {4, 4, 6}, /* cost of storing fp registers
825 in SFmode, DFmode and XFmode */
826 2, /* cost of moving MMX register */
827 {2, 2}, /* cost of loading MMX registers
828 in SImode and DImode */
829 {2, 2}, /* cost of storing MMX registers
830 in SImode and DImode */
831 2, /* cost of moving SSE register */
832 {2, 2, 8}, /* cost of loading SSE registers
833 in SImode, DImode and TImode */
834 {2, 2, 8}, /* cost of storing SSE registers
835 in SImode, DImode and TImode */
836 3, /* MMX or SSE register to integer */
837 8, /* size of l1 cache. */
838 256, /* size of l2 cache */
839 32, /* size of prefetch block */
840 6, /* number of parallel prefetches */
842 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
843 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
844 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
845 COSTS_N_INSNS (2), /* cost of FABS instruction. */
846 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
847 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
848 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
849 (we ensure the alignment). For small blocks inline loop is still a
850 noticeable win, for bigger blocks either rep movsl or rep movsb is
851 way to go. Rep movsb has apparently more expensive startup time in CPU,
852 but after 4K the difference is down in the noise. */
853 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
854 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
855 DUMMY_STRINGOP_ALGS
},
856 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
857 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
858 DUMMY_STRINGOP_ALGS
},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
873 struct processor_costs geode_cost
= {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (1), /* cost of a lea instruction */
876 COSTS_N_INSNS (2), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (4), /* HI */
880 COSTS_N_INSNS (7), /* SI */
881 COSTS_N_INSNS (7), /* DI */
882 COSTS_N_INSNS (7)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (23), /* HI */
886 COSTS_N_INSNS (39), /* SI */
887 COSTS_N_INSNS (39), /* DI */
888 COSTS_N_INSNS (39)}, /* other */
889 COSTS_N_INSNS (1), /* cost of movsx */
890 COSTS_N_INSNS (1), /* cost of movzx */
891 8, /* "large" insn */
893 1, /* cost for loading QImode using movzbl */
894 {1, 1, 1}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {1, 1, 1}, /* cost of storing integer registers */
898 1, /* cost of reg,reg fld/fst */
899 {1, 1, 1}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 6, 6}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
904 1, /* cost of moving MMX register */
905 {1, 1}, /* cost of loading MMX registers
906 in SImode and DImode */
907 {1, 1}, /* cost of storing MMX registers
908 in SImode and DImode */
909 1, /* cost of moving SSE register */
910 {1, 1, 1}, /* cost of loading SSE registers
911 in SImode, DImode and TImode */
912 {1, 1, 1}, /* cost of storing SSE registers
913 in SImode, DImode and TImode */
914 1, /* MMX or SSE register to integer */
915 64, /* size of l1 cache. */
916 128, /* size of l2 cache. */
917 32, /* size of prefetch block */
918 1, /* number of parallel prefetches */
920 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (1), /* cost of FABS instruction. */
924 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
926 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
927 DUMMY_STRINGOP_ALGS
},
928 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
929 DUMMY_STRINGOP_ALGS
},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k6_cost
= {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (3), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (3), /* DI */
953 COSTS_N_INSNS (3)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (18), /* HI */
957 COSTS_N_INSNS (18), /* SI */
958 COSTS_N_INSNS (18), /* DI */
959 COSTS_N_INSNS (18)}, /* other */
960 COSTS_N_INSNS (2), /* cost of movsx */
961 COSTS_N_INSNS (2), /* cost of movzx */
962 8, /* "large" insn */
964 3, /* cost for loading QImode using movzbl */
965 {4, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {2, 3, 2}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {6, 6, 6}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 4}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {2, 2}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {2, 2}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {2, 2, 8}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {2, 2, 8}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 6, /* MMX or SSE register to integer */
985 32, /* size of l1 cache. */
986 32, /* size of l2 cache. Some models
987 have integrated l2 cache, but
988 optimizing for k6 is not important
989 enough to worry about that. */
990 32, /* size of prefetch block */
991 1, /* number of parallel prefetches */
993 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
994 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
995 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
996 COSTS_N_INSNS (2), /* cost of FABS instruction. */
997 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
998 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
999 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1000 DUMMY_STRINGOP_ALGS
},
1001 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1002 DUMMY_STRINGOP_ALGS
},
1003 1, /* scalar_stmt_cost. */
1004 1, /* scalar load_cost. */
1005 1, /* scalar_store_cost. */
1006 1, /* vec_stmt_cost. */
1007 1, /* vec_to_scalar_cost. */
1008 1, /* scalar_to_vec_cost. */
1009 1, /* vec_align_load_cost. */
1010 2, /* vec_unalign_load_cost. */
1011 1, /* vec_store_cost. */
1012 3, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs athlon_cost
= {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (2), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (5), /* HI */
1024 COSTS_N_INSNS (5), /* SI */
1025 COSTS_N_INSNS (5), /* DI */
1026 COSTS_N_INSNS (5)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (26), /* HI */
1030 COSTS_N_INSNS (42), /* SI */
1031 COSTS_N_INSNS (74), /* DI */
1032 COSTS_N_INSNS (74)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {3, 4, 3}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {3, 4, 3}, /* cost of storing integer registers */
1042 4, /* cost of reg,reg fld/fst */
1043 {4, 4, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {6, 6, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 6}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 5}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 5, /* MMX or SSE register to integer */
1058 64, /* size of l1 cache. */
1059 256, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 6, /* number of parallel prefetches */
1062 5, /* Branch cost */
1063 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1069 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1070 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1071 128 bytes for memset. */
1072 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1073 DUMMY_STRINGOP_ALGS
},
1074 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1075 DUMMY_STRINGOP_ALGS
},
1076 1, /* scalar_stmt_cost. */
1077 1, /* scalar load_cost. */
1078 1, /* scalar_store_cost. */
1079 1, /* vec_stmt_cost. */
1080 1, /* vec_to_scalar_cost. */
1081 1, /* scalar_to_vec_cost. */
1082 1, /* vec_align_load_cost. */
1083 2, /* vec_unalign_load_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1090 struct processor_costs k8_cost
= {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (2), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (3), /* SI */
1098 COSTS_N_INSNS (4), /* DI */
1099 COSTS_N_INSNS (5)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (26), /* HI */
1103 COSTS_N_INSNS (42), /* SI */
1104 COSTS_N_INSNS (74), /* DI */
1105 COSTS_N_INSNS (74)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {3, 4, 3}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {3, 4, 3}, /* cost of storing integer registers */
1115 4, /* cost of reg,reg fld/fst */
1116 {4, 4, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {6, 6, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {3, 3}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 3, 6}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 5}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 5, /* MMX or SSE register to integer */
1131 64, /* size of l1 cache. */
1132 512, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 3, /* Branch cost */
1141 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1147 /* K8 has optimized REP instruction for medium sized blocks, but for very
1148 small blocks it is better to use loop. For large blocks, libcall can
1149 do nontemporary accesses and beat inline considerably. */
1150 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1151 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1152 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1153 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1154 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1155 4, /* scalar_stmt_cost. */
1156 2, /* scalar load_cost. */
1157 2, /* scalar_store_cost. */
1158 5, /* vec_stmt_cost. */
1159 0, /* vec_to_scalar_cost. */
1160 2, /* scalar_to_vec_cost. */
1161 2, /* vec_align_load_cost. */
1162 3, /* vec_unalign_load_cost. */
1163 3, /* vec_store_cost. */
1164 3, /* cond_taken_branch_cost. */
1165 2, /* cond_not_taken_branch_cost. */
1168 struct processor_costs amdfam10_cost
= {
1169 COSTS_N_INSNS (1), /* cost of an add instruction */
1170 COSTS_N_INSNS (2), /* cost of a lea instruction */
1171 COSTS_N_INSNS (1), /* variable shift costs */
1172 COSTS_N_INSNS (1), /* constant shift costs */
1173 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1174 COSTS_N_INSNS (4), /* HI */
1175 COSTS_N_INSNS (3), /* SI */
1176 COSTS_N_INSNS (4), /* DI */
1177 COSTS_N_INSNS (5)}, /* other */
1178 0, /* cost of multiply per each bit set */
1179 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1180 COSTS_N_INSNS (35), /* HI */
1181 COSTS_N_INSNS (51), /* SI */
1182 COSTS_N_INSNS (83), /* DI */
1183 COSTS_N_INSNS (83)}, /* other */
1184 COSTS_N_INSNS (1), /* cost of movsx */
1185 COSTS_N_INSNS (1), /* cost of movzx */
1186 8, /* "large" insn */
1188 4, /* cost for loading QImode using movzbl */
1189 {3, 4, 3}, /* cost of loading integer registers
1190 in QImode, HImode and SImode.
1191 Relative to reg-reg move (2). */
1192 {3, 4, 3}, /* cost of storing integer registers */
1193 4, /* cost of reg,reg fld/fst */
1194 {4, 4, 12}, /* cost of loading fp registers
1195 in SFmode, DFmode and XFmode */
1196 {6, 6, 8}, /* cost of storing fp registers
1197 in SFmode, DFmode and XFmode */
1198 2, /* cost of moving MMX register */
1199 {3, 3}, /* cost of loading MMX registers
1200 in SImode and DImode */
1201 {4, 4}, /* cost of storing MMX registers
1202 in SImode and DImode */
1203 2, /* cost of moving SSE register */
1204 {4, 4, 3}, /* cost of loading SSE registers
1205 in SImode, DImode and TImode */
1206 {4, 4, 5}, /* cost of storing SSE registers
1207 in SImode, DImode and TImode */
1208 3, /* MMX or SSE register to integer */
1210 MOVD reg64, xmmreg Double FSTORE 4
1211 MOVD reg32, xmmreg Double FSTORE 4
1213 MOVD reg64, xmmreg Double FADD 3
1215 MOVD reg32, xmmreg Double FADD 3
1217 64, /* size of l1 cache. */
1218 512, /* size of l2 cache. */
1219 64, /* size of prefetch block */
1220 /* New AMD processors never drop prefetches; if they cannot be performed
1221 immediately, they are queued. We set number of simultaneous prefetches
1222 to a large constant to reflect this (it probably is not a good idea not
1223 to limit number of prefetches at all, as their execution also takes some
1225 100, /* number of parallel prefetches */
1226 2, /* Branch cost */
1227 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1228 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1229 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1230 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1231 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1232 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1234 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1235 very small blocks it is better to use loop. For large blocks, libcall can
1236 do nontemporary accesses and beat inline considerably. */
1237 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1238 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1239 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1240 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1241 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1242 4, /* scalar_stmt_cost. */
1243 2, /* scalar load_cost. */
1244 2, /* scalar_store_cost. */
1245 6, /* vec_stmt_cost. */
1246 0, /* vec_to_scalar_cost. */
1247 2, /* scalar_to_vec_cost. */
1248 2, /* vec_align_load_cost. */
1249 2, /* vec_unalign_load_cost. */
1250 2, /* vec_store_cost. */
1251 2, /* cond_taken_branch_cost. */
1252 1, /* cond_not_taken_branch_cost. */
1255 struct processor_costs bdver1_cost
= {
1256 COSTS_N_INSNS (1), /* cost of an add instruction */
1257 COSTS_N_INSNS (1), /* cost of a lea instruction */
1258 COSTS_N_INSNS (1), /* variable shift costs */
1259 COSTS_N_INSNS (1), /* constant shift costs */
1260 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1261 COSTS_N_INSNS (4), /* HI */
1262 COSTS_N_INSNS (4), /* SI */
1263 COSTS_N_INSNS (6), /* DI */
1264 COSTS_N_INSNS (6)}, /* other */
1265 0, /* cost of multiply per each bit set */
1266 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1267 COSTS_N_INSNS (35), /* HI */
1268 COSTS_N_INSNS (51), /* SI */
1269 COSTS_N_INSNS (83), /* DI */
1270 COSTS_N_INSNS (83)}, /* other */
1271 COSTS_N_INSNS (1), /* cost of movsx */
1272 COSTS_N_INSNS (1), /* cost of movzx */
1273 8, /* "large" insn */
1275 4, /* cost for loading QImode using movzbl */
1276 {5, 5, 4}, /* cost of loading integer registers
1277 in QImode, HImode and SImode.
1278 Relative to reg-reg move (2). */
1279 {4, 4, 4}, /* cost of storing integer registers */
1280 2, /* cost of reg,reg fld/fst */
1281 {5, 5, 12}, /* cost of loading fp registers
1282 in SFmode, DFmode and XFmode */
1283 {4, 4, 8}, /* cost of storing fp registers
1284 in SFmode, DFmode and XFmode */
1285 2, /* cost of moving MMX register */
1286 {4, 4}, /* cost of loading MMX registers
1287 in SImode and DImode */
1288 {4, 4}, /* cost of storing MMX registers
1289 in SImode and DImode */
1290 2, /* cost of moving SSE register */
1291 {4, 4, 4}, /* cost of loading SSE registers
1292 in SImode, DImode and TImode */
1293 {4, 4, 4}, /* cost of storing SSE registers
1294 in SImode, DImode and TImode */
1295 2, /* MMX or SSE register to integer */
1297 MOVD reg64, xmmreg Double FSTORE 4
1298 MOVD reg32, xmmreg Double FSTORE 4
1300 MOVD reg64, xmmreg Double FADD 3
1302 MOVD reg32, xmmreg Double FADD 3
1304 16, /* size of l1 cache. */
1305 2048, /* size of l2 cache. */
1306 64, /* size of prefetch block */
1307 /* New AMD processors never drop prefetches; if they cannot be performed
1308 immediately, they are queued. We set number of simultaneous prefetches
1309 to a large constant to reflect this (it probably is not a good idea not
1310 to limit number of prefetches at all, as their execution also takes some
1312 100, /* number of parallel prefetches */
1313 2, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1322 very small blocks it is better to use loop. For large blocks, libcall
1323 can do nontemporary accesses and beat inline considerably. */
1324 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1325 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1326 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1327 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1328 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1329 6, /* scalar_stmt_cost. */
1330 4, /* scalar load_cost. */
1331 4, /* scalar_store_cost. */
1332 6, /* vec_stmt_cost. */
1333 0, /* vec_to_scalar_cost. */
1334 2, /* scalar_to_vec_cost. */
1335 4, /* vec_align_load_cost. */
1336 4, /* vec_unalign_load_cost. */
1337 4, /* vec_store_cost. */
1338 2, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1342 struct processor_costs bdver2_cost
= {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (1), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (4), /* HI */
1349 COSTS_N_INSNS (4), /* SI */
1350 COSTS_N_INSNS (6), /* DI */
1351 COSTS_N_INSNS (6)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (35), /* HI */
1355 COSTS_N_INSNS (51), /* SI */
1356 COSTS_N_INSNS (83), /* DI */
1357 COSTS_N_INSNS (83)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 8, /* "large" insn */
1362 4, /* cost for loading QImode using movzbl */
1363 {5, 5, 4}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {4, 4, 4}, /* cost of storing integer registers */
1367 2, /* cost of reg,reg fld/fst */
1368 {5, 5, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {4, 4, 8}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 2, /* cost of moving MMX register */
1373 {4, 4}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {4, 4}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 2, /* cost of moving SSE register */
1378 {4, 4, 4}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {4, 4, 4}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 2, /* MMX or SSE register to integer */
1384 MOVD reg64, xmmreg Double FSTORE 4
1385 MOVD reg32, xmmreg Double FSTORE 4
1387 MOVD reg64, xmmreg Double FADD 3
1389 MOVD reg32, xmmreg Double FADD 3
1391 16, /* size of l1 cache. */
1392 2048, /* size of l2 cache. */
1393 64, /* size of prefetch block */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches */
1400 2, /* Branch cost */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1408 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1409 very small blocks it is better to use loop. For large blocks, libcall
1410 can do nontemporary accesses and beat inline considerably. */
1411 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1412 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1413 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1414 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1415 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1416 6, /* scalar_stmt_cost. */
1417 4, /* scalar load_cost. */
1418 4, /* scalar_store_cost. */
1419 6, /* vec_stmt_cost. */
1420 0, /* vec_to_scalar_cost. */
1421 2, /* scalar_to_vec_cost. */
1422 4, /* vec_align_load_cost. */
1423 4, /* vec_unalign_load_cost. */
1424 4, /* vec_store_cost. */
1425 2, /* cond_taken_branch_cost. */
1426 1, /* cond_not_taken_branch_cost. */
1429 struct processor_costs btver1_cost
= {
1430 COSTS_N_INSNS (1), /* cost of an add instruction */
1431 COSTS_N_INSNS (2), /* cost of a lea instruction */
1432 COSTS_N_INSNS (1), /* variable shift costs */
1433 COSTS_N_INSNS (1), /* constant shift costs */
1434 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1435 COSTS_N_INSNS (4), /* HI */
1436 COSTS_N_INSNS (3), /* SI */
1437 COSTS_N_INSNS (4), /* DI */
1438 COSTS_N_INSNS (5)}, /* other */
1439 0, /* cost of multiply per each bit set */
1440 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1441 COSTS_N_INSNS (35), /* HI */
1442 COSTS_N_INSNS (51), /* SI */
1443 COSTS_N_INSNS (83), /* DI */
1444 COSTS_N_INSNS (83)}, /* other */
1445 COSTS_N_INSNS (1), /* cost of movsx */
1446 COSTS_N_INSNS (1), /* cost of movzx */
1447 8, /* "large" insn */
1449 4, /* cost for loading QImode using movzbl */
1450 {3, 4, 3}, /* cost of loading integer registers
1451 in QImode, HImode and SImode.
1452 Relative to reg-reg move (2). */
1453 {3, 4, 3}, /* cost of storing integer registers */
1454 4, /* cost of reg,reg fld/fst */
1455 {4, 4, 12}, /* cost of loading fp registers
1456 in SFmode, DFmode and XFmode */
1457 {6, 6, 8}, /* cost of storing fp registers
1458 in SFmode, DFmode and XFmode */
1459 2, /* cost of moving MMX register */
1460 {3, 3}, /* cost of loading MMX registers
1461 in SImode and DImode */
1462 {4, 4}, /* cost of storing MMX registers
1463 in SImode and DImode */
1464 2, /* cost of moving SSE register */
1465 {4, 4, 3}, /* cost of loading SSE registers
1466 in SImode, DImode and TImode */
1467 {4, 4, 5}, /* cost of storing SSE registers
1468 in SImode, DImode and TImode */
1469 3, /* MMX or SSE register to integer */
1471 MOVD reg64, xmmreg Double FSTORE 4
1472 MOVD reg32, xmmreg Double FSTORE 4
1474 MOVD reg64, xmmreg Double FADD 3
1476 MOVD reg32, xmmreg Double FADD 3
1478 32, /* size of l1 cache. */
1479 512, /* size of l2 cache. */
1480 64, /* size of prefetch block */
1481 100, /* number of parallel prefetches */
1482 2, /* Branch cost */
1483 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1484 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1485 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1486 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1487 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1488 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1490 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1491 very small blocks it is better to use loop. For large blocks, libcall can
1492 do nontemporary accesses and beat inline considerably. */
1493 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1494 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1495 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1496 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1497 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1498 4, /* scalar_stmt_cost. */
1499 2, /* scalar load_cost. */
1500 2, /* scalar_store_cost. */
1501 6, /* vec_stmt_cost. */
1502 0, /* vec_to_scalar_cost. */
1503 2, /* scalar_to_vec_cost. */
1504 2, /* vec_align_load_cost. */
1505 2, /* vec_unalign_load_cost. */
1506 2, /* vec_store_cost. */
1507 2, /* cond_taken_branch_cost. */
1508 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs pentium4_cost
= {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (3), /* cost of a lea instruction */
1515 COSTS_N_INSNS (4), /* variable shift costs */
1516 COSTS_N_INSNS (4), /* constant shift costs */
1517 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (15), /* HI */
1519 COSTS_N_INSNS (15), /* SI */
1520 COSTS_N_INSNS (15), /* DI */
1521 COSTS_N_INSNS (15)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (56), /* HI */
1525 COSTS_N_INSNS (56), /* SI */
1526 COSTS_N_INSNS (56), /* DI */
1527 COSTS_N_INSNS (56)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 16, /* "large" insn */
1532 2, /* cost for loading QImode using movzbl */
1533 {4, 5, 4}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {2, 3, 2}, /* cost of storing integer registers */
1537 2, /* cost of reg,reg fld/fst */
1538 {2, 2, 6}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {4, 4, 6}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {2, 2}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {2, 2}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 12, /* cost of moving SSE register */
1548 {12, 12, 12}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {2, 2, 8}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 10, /* MMX or SSE register to integer */
1553 8, /* size of l1 cache. */
1554 256, /* size of l2 cache. */
1555 64, /* size of prefetch block */
1556 6, /* number of parallel prefetches */
1557 2, /* Branch cost */
1558 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1559 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1560 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1563 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1564 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1565 DUMMY_STRINGOP_ALGS
},
1566 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1568 DUMMY_STRINGOP_ALGS
},
1569 1, /* scalar_stmt_cost. */
1570 1, /* scalar load_cost. */
1571 1, /* scalar_store_cost. */
1572 1, /* vec_stmt_cost. */
1573 1, /* vec_to_scalar_cost. */
1574 1, /* scalar_to_vec_cost. */
1575 1, /* vec_align_load_cost. */
1576 2, /* vec_unalign_load_cost. */
1577 1, /* vec_store_cost. */
1578 3, /* cond_taken_branch_cost. */
1579 1, /* cond_not_taken_branch_cost. */
1583 struct processor_costs nocona_cost
= {
1584 COSTS_N_INSNS (1), /* cost of an add instruction */
1585 COSTS_N_INSNS (1), /* cost of a lea instruction */
1586 COSTS_N_INSNS (1), /* variable shift costs */
1587 COSTS_N_INSNS (1), /* constant shift costs */
1588 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1589 COSTS_N_INSNS (10), /* HI */
1590 COSTS_N_INSNS (10), /* SI */
1591 COSTS_N_INSNS (10), /* DI */
1592 COSTS_N_INSNS (10)}, /* other */
1593 0, /* cost of multiply per each bit set */
1594 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1595 COSTS_N_INSNS (66), /* HI */
1596 COSTS_N_INSNS (66), /* SI */
1597 COSTS_N_INSNS (66), /* DI */
1598 COSTS_N_INSNS (66)}, /* other */
1599 COSTS_N_INSNS (1), /* cost of movsx */
1600 COSTS_N_INSNS (1), /* cost of movzx */
1601 16, /* "large" insn */
1602 17, /* MOVE_RATIO */
1603 4, /* cost for loading QImode using movzbl */
1604 {4, 4, 4}, /* cost of loading integer registers
1605 in QImode, HImode and SImode.
1606 Relative to reg-reg move (2). */
1607 {4, 4, 4}, /* cost of storing integer registers */
1608 3, /* cost of reg,reg fld/fst */
1609 {12, 12, 12}, /* cost of loading fp registers
1610 in SFmode, DFmode and XFmode */
1611 {4, 4, 4}, /* cost of storing fp registers
1612 in SFmode, DFmode and XFmode */
1613 6, /* cost of moving MMX register */
1614 {12, 12}, /* cost of loading MMX registers
1615 in SImode and DImode */
1616 {12, 12}, /* cost of storing MMX registers
1617 in SImode and DImode */
1618 6, /* cost of moving SSE register */
1619 {12, 12, 12}, /* cost of loading SSE registers
1620 in SImode, DImode and TImode */
1621 {12, 12, 12}, /* cost of storing SSE registers
1622 in SImode, DImode and TImode */
1623 8, /* MMX or SSE register to integer */
1624 8, /* size of l1 cache. */
1625 1024, /* size of l2 cache. */
1626 128, /* size of prefetch block */
1627 8, /* number of parallel prefetches */
1628 1, /* Branch cost */
1629 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1630 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1631 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1632 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1633 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1634 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1635 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1636 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1637 {100000, unrolled_loop
}, {-1, libcall
}}}},
1638 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1640 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1641 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1642 1, /* scalar_stmt_cost. */
1643 1, /* scalar load_cost. */
1644 1, /* scalar_store_cost. */
1645 1, /* vec_stmt_cost. */
1646 1, /* vec_to_scalar_cost. */
1647 1, /* scalar_to_vec_cost. */
1648 1, /* vec_align_load_cost. */
1649 2, /* vec_unalign_load_cost. */
1650 1, /* vec_store_cost. */
1651 3, /* cond_taken_branch_cost. */
1652 1, /* cond_not_taken_branch_cost. */
1656 struct processor_costs atom_cost
= {
1657 COSTS_N_INSNS (1), /* cost of an add instruction */
1658 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1659 COSTS_N_INSNS (1), /* variable shift costs */
1660 COSTS_N_INSNS (1), /* constant shift costs */
1661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1662 COSTS_N_INSNS (4), /* HI */
1663 COSTS_N_INSNS (3), /* SI */
1664 COSTS_N_INSNS (4), /* DI */
1665 COSTS_N_INSNS (2)}, /* other */
1666 0, /* cost of multiply per each bit set */
1667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1668 COSTS_N_INSNS (26), /* HI */
1669 COSTS_N_INSNS (42), /* SI */
1670 COSTS_N_INSNS (74), /* DI */
1671 COSTS_N_INSNS (74)}, /* other */
1672 COSTS_N_INSNS (1), /* cost of movsx */
1673 COSTS_N_INSNS (1), /* cost of movzx */
1674 8, /* "large" insn */
1675 17, /* MOVE_RATIO */
1676 4, /* cost for loading QImode using movzbl */
1677 {4, 4, 4}, /* cost of loading integer registers
1678 in QImode, HImode and SImode.
1679 Relative to reg-reg move (2). */
1680 {4, 4, 4}, /* cost of storing integer registers */
1681 4, /* cost of reg,reg fld/fst */
1682 {12, 12, 12}, /* cost of loading fp registers
1683 in SFmode, DFmode and XFmode */
1684 {6, 6, 8}, /* cost of storing fp registers
1685 in SFmode, DFmode and XFmode */
1686 2, /* cost of moving MMX register */
1687 {8, 8}, /* cost of loading MMX registers
1688 in SImode and DImode */
1689 {8, 8}, /* cost of storing MMX registers
1690 in SImode and DImode */
1691 2, /* cost of moving SSE register */
1692 {8, 8, 8}, /* cost of loading SSE registers
1693 in SImode, DImode and TImode */
1694 {8, 8, 8}, /* cost of storing SSE registers
1695 in SImode, DImode and TImode */
1696 5, /* MMX or SSE register to integer */
1697 32, /* size of l1 cache. */
1698 256, /* size of l2 cache. */
1699 64, /* size of prefetch block */
1700 6, /* number of parallel prefetches */
1701 3, /* Branch cost */
1702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1703 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1704 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1705 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1706 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1707 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1708 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1709 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1710 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1711 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1712 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1713 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1714 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1715 1, /* scalar_stmt_cost. */
1716 1, /* scalar load_cost. */
1717 1, /* scalar_store_cost. */
1718 1, /* vec_stmt_cost. */
1719 1, /* vec_to_scalar_cost. */
1720 1, /* scalar_to_vec_cost. */
1721 1, /* vec_align_load_cost. */
1722 2, /* vec_unalign_load_cost. */
1723 1, /* vec_store_cost. */
1724 3, /* cond_taken_branch_cost. */
1725 1, /* cond_not_taken_branch_cost. */
1728 /* Generic64 should produce code tuned for Nocona and K8. */
1730 struct processor_costs generic64_cost
= {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 /* On all chips taken into consideration lea is 2 cycles and more. With
1733 this cost however our current implementation of synth_mult results in
1734 use of unnecessary temporary registers causing regression on several
1735 SPECfp benchmarks. */
1736 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1737 COSTS_N_INSNS (1), /* variable shift costs */
1738 COSTS_N_INSNS (1), /* constant shift costs */
1739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1740 COSTS_N_INSNS (4), /* HI */
1741 COSTS_N_INSNS (3), /* SI */
1742 COSTS_N_INSNS (4), /* DI */
1743 COSTS_N_INSNS (2)}, /* other */
1744 0, /* cost of multiply per each bit set */
1745 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1746 COSTS_N_INSNS (26), /* HI */
1747 COSTS_N_INSNS (42), /* SI */
1748 COSTS_N_INSNS (74), /* DI */
1749 COSTS_N_INSNS (74)}, /* other */
1750 COSTS_N_INSNS (1), /* cost of movsx */
1751 COSTS_N_INSNS (1), /* cost of movzx */
1752 8, /* "large" insn */
1753 17, /* MOVE_RATIO */
1754 4, /* cost for loading QImode using movzbl */
1755 {4, 4, 4}, /* cost of loading integer registers
1756 in QImode, HImode and SImode.
1757 Relative to reg-reg move (2). */
1758 {4, 4, 4}, /* cost of storing integer registers */
1759 4, /* cost of reg,reg fld/fst */
1760 {12, 12, 12}, /* cost of loading fp registers
1761 in SFmode, DFmode and XFmode */
1762 {6, 6, 8}, /* cost of storing fp registers
1763 in SFmode, DFmode and XFmode */
1764 2, /* cost of moving MMX register */
1765 {8, 8}, /* cost of loading MMX registers
1766 in SImode and DImode */
1767 {8, 8}, /* cost of storing MMX registers
1768 in SImode and DImode */
1769 2, /* cost of moving SSE register */
1770 {8, 8, 8}, /* cost of loading SSE registers
1771 in SImode, DImode and TImode */
1772 {8, 8, 8}, /* cost of storing SSE registers
1773 in SImode, DImode and TImode */
1774 5, /* MMX or SSE register to integer */
1775 32, /* size of l1 cache. */
1776 512, /* size of l2 cache. */
1777 64, /* size of prefetch block */
1778 6, /* number of parallel prefetches */
1779 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1780 value is increased to perhaps more appropriate value of 5. */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {DUMMY_STRINGOP_ALGS
,
1789 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1790 {DUMMY_STRINGOP_ALGS
,
1791 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1792 1, /* scalar_stmt_cost. */
1793 1, /* scalar load_cost. */
1794 1, /* scalar_store_cost. */
1795 1, /* vec_stmt_cost. */
1796 1, /* vec_to_scalar_cost. */
1797 1, /* scalar_to_vec_cost. */
1798 1, /* vec_align_load_cost. */
1799 2, /* vec_unalign_load_cost. */
1800 1, /* vec_store_cost. */
1801 3, /* cond_taken_branch_cost. */
1802 1, /* cond_not_taken_branch_cost. */
1805 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1808 struct processor_costs generic32_cost
= {
1809 COSTS_N_INSNS (1), /* cost of an add instruction */
1810 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1811 COSTS_N_INSNS (1), /* variable shift costs */
1812 COSTS_N_INSNS (1), /* constant shift costs */
1813 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1814 COSTS_N_INSNS (4), /* HI */
1815 COSTS_N_INSNS (3), /* SI */
1816 COSTS_N_INSNS (4), /* DI */
1817 COSTS_N_INSNS (2)}, /* other */
1818 0, /* cost of multiply per each bit set */
1819 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1820 COSTS_N_INSNS (26), /* HI */
1821 COSTS_N_INSNS (42), /* SI */
1822 COSTS_N_INSNS (74), /* DI */
1823 COSTS_N_INSNS (74)}, /* other */
1824 COSTS_N_INSNS (1), /* cost of movsx */
1825 COSTS_N_INSNS (1), /* cost of movzx */
1826 8, /* "large" insn */
1827 17, /* MOVE_RATIO */
1828 4, /* cost for loading QImode using movzbl */
1829 {4, 4, 4}, /* cost of loading integer registers
1830 in QImode, HImode and SImode.
1831 Relative to reg-reg move (2). */
1832 {4, 4, 4}, /* cost of storing integer registers */
1833 4, /* cost of reg,reg fld/fst */
1834 {12, 12, 12}, /* cost of loading fp registers
1835 in SFmode, DFmode and XFmode */
1836 {6, 6, 8}, /* cost of storing fp registers
1837 in SFmode, DFmode and XFmode */
1838 2, /* cost of moving MMX register */
1839 {8, 8}, /* cost of loading MMX registers
1840 in SImode and DImode */
1841 {8, 8}, /* cost of storing MMX registers
1842 in SImode and DImode */
1843 2, /* cost of moving SSE register */
1844 {8, 8, 8}, /* cost of loading SSE registers
1845 in SImode, DImode and TImode */
1846 {8, 8, 8}, /* cost of storing SSE registers
1847 in SImode, DImode and TImode */
1848 5, /* MMX or SSE register to integer */
1849 32, /* size of l1 cache. */
1850 256, /* size of l2 cache. */
1851 64, /* size of prefetch block */
1852 6, /* number of parallel prefetches */
1853 3, /* Branch cost */
1854 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1855 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1856 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1857 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1858 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1859 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1860 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1861 DUMMY_STRINGOP_ALGS
},
1862 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1863 DUMMY_STRINGOP_ALGS
},
1864 1, /* scalar_stmt_cost. */
1865 1, /* scalar load_cost. */
1866 1, /* scalar_store_cost. */
1867 1, /* vec_stmt_cost. */
1868 1, /* vec_to_scalar_cost. */
1869 1, /* scalar_to_vec_cost. */
1870 1, /* vec_align_load_cost. */
1871 2, /* vec_unalign_load_cost. */
1872 1, /* vec_store_cost. */
1873 3, /* cond_taken_branch_cost. */
1874 1, /* cond_not_taken_branch_cost. */
1877 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1879 /* Processor feature/optimization bitmasks. */
1880 #define m_386 (1<<PROCESSOR_I386)
1881 #define m_486 (1<<PROCESSOR_I486)
1882 #define m_PENT (1<<PROCESSOR_PENTIUM)
1883 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1884 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1885 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1886 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1887 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1888 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1889 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1890 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1891 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1892 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1893 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1894 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1895 #define m_ATOM (1<<PROCESSOR_ATOM)
1897 #define m_GEODE (1<<PROCESSOR_GEODE)
1898 #define m_K6 (1<<PROCESSOR_K6)
1899 #define m_K6_GEODE (m_K6 | m_GEODE)
1900 #define m_K8 (1<<PROCESSOR_K8)
1901 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1902 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1903 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1904 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1905 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1906 #define m_BDVER (m_BDVER1 | m_BDVER2)
1907 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1908 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1910 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1911 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1913 /* Generic instruction choice should be common subset of supported CPUs
1914 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1915 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1917 /* Feature tests against the various tunings. */
1918 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1920 /* Feature tests against the various tunings used to create ix86_tune_features
1921 based on the processor mask. */
1922 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1923 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1924 negatively, so enabling for Generic64 seems like good code size
1925 tradeoff. We can't enable it for 32bit generic because it does not
1926 work well with PPro base chips. */
1927 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1929 /* X86_TUNE_PUSH_MEMORY */
1930 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1932 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1935 /* X86_TUNE_UNROLL_STRLEN */
1936 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1938 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1939 on simulation result. But after P4 was made, no performance benefit
1940 was observed with branch hints. It also increases the code size.
1941 As a result, icc never generates branch hints. */
1944 /* X86_TUNE_DOUBLE_WITH_ADD */
1947 /* X86_TUNE_USE_SAHF */
1948 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
1950 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1951 partial dependencies. */
1952 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1954 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1955 register stalls on Generic32 compilation setting as well. However
1956 in current implementation the partial register stalls are not eliminated
1957 very well - they can be introduced via subregs synthesized by combine
1958 and can happen in caller/callee saving sequences. Because this option
1959 pays back little on PPro based chips and is in conflict with partial reg
1960 dependencies used by Athlon/P4 based chips, it is better to leave it off
1961 for generic32 for now. */
1964 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1965 m_CORE2I7
| m_GENERIC
,
1967 /* X86_TUNE_USE_HIMODE_FIOP */
1968 m_386
| m_486
| m_K6_GEODE
,
1970 /* X86_TUNE_USE_SIMODE_FIOP */
1971 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1973 /* X86_TUNE_USE_MOV0 */
1976 /* X86_TUNE_USE_CLTD */
1977 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1979 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1982 /* X86_TUNE_SPLIT_LONG_MOVES */
1985 /* X86_TUNE_READ_MODIFY_WRITE */
1988 /* X86_TUNE_READ_MODIFY */
1991 /* X86_TUNE_PROMOTE_QIMODE */
1992 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1994 /* X86_TUNE_FAST_PREFIX */
1995 ~(m_386
| m_486
| m_PENT
),
1997 /* X86_TUNE_SINGLE_STRINGOP */
1998 m_386
| m_P4_NOCONA
,
2000 /* X86_TUNE_QIMODE_MATH */
2003 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2004 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2005 might be considered for Generic32 if our scheme for avoiding partial
2006 stalls was more effective. */
2009 /* X86_TUNE_PROMOTE_QI_REGS */
2012 /* X86_TUNE_PROMOTE_HI_REGS */
2015 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2016 over esp addition. */
2017 m_386
| m_486
| m_PENT
| m_PPRO
,
2019 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2020 over esp addition. */
2023 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2024 over esp subtraction. */
2025 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2027 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2028 over esp subtraction. */
2029 m_PENT
| m_K6_GEODE
,
2031 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2032 for DFmode copies */
2033 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2035 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2036 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2038 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2039 conflict here in between PPro/Pentium4 based chips that thread 128bit
2040 SSE registers as single units versus K8 based chips that divide SSE
2041 registers to two 64bit halves. This knob promotes all store destinations
2042 to be 128bit to allow register renaming on 128bit SSE units, but usually
2043 results in one extra microop on 64bit SSE units. Experimental results
2044 shows that disabling this option on P4 brings over 20% SPECfp regression,
2045 while enabling it on K8 brings roughly 2.4% regression that can be partly
2046 masked by careful scheduling of moves. */
2047 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2049 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2050 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2052 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2055 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2058 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2059 are resolved on SSE register parts instead of whole registers, so we may
2060 maintain just lower part of scalar values in proper format leaving the
2061 upper part undefined. */
2064 /* X86_TUNE_SSE_TYPELESS_STORES */
2067 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2068 m_PPRO
| m_P4_NOCONA
,
2070 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2071 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2073 /* X86_TUNE_PROLOGUE_USING_MOVE */
2074 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2076 /* X86_TUNE_EPILOGUE_USING_MOVE */
2077 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2079 /* X86_TUNE_SHIFT1 */
2082 /* X86_TUNE_USE_FFREEP */
2085 /* X86_TUNE_INTER_UNIT_MOVES */
2086 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2088 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2089 ~(m_AMDFAM10
| m_BDVER
),
2091 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2092 than 4 branch instructions in the 16 byte window. */
2093 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2095 /* X86_TUNE_SCHEDULE */
2096 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2098 /* X86_TUNE_USE_BT */
2099 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2101 /* X86_TUNE_USE_INCDEC */
2102 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2104 /* X86_TUNE_PAD_RETURNS */
2105 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2107 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2110 /* X86_TUNE_EXT_80387_CONSTANTS */
2111 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2113 /* X86_TUNE_SHORTEN_X87_SSE */
2116 /* X86_TUNE_AVOID_VECTOR_DECODE */
2117 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2119 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2120 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2123 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2124 vector path on AMD machines. */
2125 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2127 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2129 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2131 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2135 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2136 but one byte longer. */
2139 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2140 operand that cannot be represented using a modRM byte. The XOR
2141 replacement is long decoded, so this split helps here as well. */
2144 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2146 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2148 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2149 from integer to FP. */
2152 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2153 with a subsequent conditional jump instruction into a single
2154 compare-and-branch uop. */
2157 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2158 will impact LEA instruction selection. */
2161 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2165 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2166 at -O3. For the moment, the prefetching seems badly tuned for Intel
2168 m_K6_GEODE
| m_AMD_MULTIPLE
,
2170 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2171 the auto-vectorizer. */
2174 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2175 during reassociation of integer computation. */
2178 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2179 during reassociation of fp computation. */
2183 /* Feature tests against the various architecture variations. */
2184 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2186 /* Feature tests against the various architecture variations, used to create
2187 ix86_arch_features based on the processor mask. */
2188 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2189 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2190 ~(m_386
| m_486
| m_PENT
| m_K6
),
2192 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2195 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2198 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2201 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2205 static const unsigned int x86_accumulate_outgoing_args
2206 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2208 static const unsigned int x86_arch_always_fancy_math_387
2209 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2211 static const unsigned int x86_avx256_split_unaligned_load
2212 = m_COREI7
| m_GENERIC
;
2214 static const unsigned int x86_avx256_split_unaligned_store
2215 = m_COREI7
| m_BDVER
| m_GENERIC
;
2217 /* In case the average insn count for single function invocation is
2218 lower than this constant, emit fast (but longer) prologue and
2220 #define FAST_PROLOGUE_INSN_COUNT 20
2222 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2223 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2224 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2225 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2227 /* Array of the smallest class containing reg number REGNO, indexed by
2228 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2230 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2232 /* ax, dx, cx, bx */
2233 AREG
, DREG
, CREG
, BREG
,
2234 /* si, di, bp, sp */
2235 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2237 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2238 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2241 /* flags, fpsr, fpcr, frame */
2242 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2244 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2247 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2250 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2251 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2252 /* SSE REX registers */
2253 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2257 /* The "default" register map used in 32bit mode. */
2259 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2263 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2270 /* The "default" register map used in 64bit mode. */
2272 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2274 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2275 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2276 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2277 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2278 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2279 8,9,10,11,12,13,14,15, /* extended integer registers */
2280 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2283 /* Define the register numbers to be used in Dwarf debugging information.
2284 The SVR4 reference port C compiler uses the following register numbers
2285 in its Dwarf output code:
2286 0 for %eax (gcc regno = 0)
2287 1 for %ecx (gcc regno = 2)
2288 2 for %edx (gcc regno = 1)
2289 3 for %ebx (gcc regno = 3)
2290 4 for %esp (gcc regno = 7)
2291 5 for %ebp (gcc regno = 6)
2292 6 for %esi (gcc regno = 4)
2293 7 for %edi (gcc regno = 5)
2294 The following three DWARF register numbers are never generated by
2295 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2296 believes these numbers have these meanings.
2297 8 for %eip (no gcc equivalent)
2298 9 for %eflags (gcc regno = 17)
2299 10 for %trapno (no gcc equivalent)
2300 It is not at all clear how we should number the FP stack registers
2301 for the x86 architecture. If the version of SDB on x86/svr4 were
2302 a bit less brain dead with respect to floating-point then we would
2303 have a precedent to follow with respect to DWARF register numbers
2304 for x86 FP registers, but the SDB on x86/svr4 is so completely
2305 broken with respect to FP registers that it is hardly worth thinking
2306 of it as something to strive for compatibility with.
2307 The version of x86/svr4 SDB I have at the moment does (partially)
2308 seem to believe that DWARF register number 11 is associated with
2309 the x86 register %st(0), but that's about all. Higher DWARF
2310 register numbers don't seem to be associated with anything in
2311 particular, and even for DWARF regno 11, SDB only seems to under-
2312 stand that it should say that a variable lives in %st(0) (when
2313 asked via an `=' command) if we said it was in DWARF regno 11,
2314 but SDB still prints garbage when asked for the value of the
2315 variable in question (via a `/' command).
2316 (Also note that the labels SDB prints for various FP stack regs
2317 when doing an `x' command are all wrong.)
2318 Note that these problems generally don't affect the native SVR4
2319 C compiler because it doesn't allow the use of -O with -g and
2320 because when it is *not* optimizing, it allocates a memory
2321 location for each floating-point variable, and the memory
2322 location is what gets described in the DWARF AT_location
2323 attribute for the variable in question.
2324 Regardless of the severe mental illness of the x86/svr4 SDB, we
2325 do something sensible here and we use the following DWARF
2326 register numbers. Note that these are all stack-top-relative
2328 11 for %st(0) (gcc regno = 8)
2329 12 for %st(1) (gcc regno = 9)
2330 13 for %st(2) (gcc regno = 10)
2331 14 for %st(3) (gcc regno = 11)
2332 15 for %st(4) (gcc regno = 12)
2333 16 for %st(5) (gcc regno = 13)
2334 17 for %st(6) (gcc regno = 14)
2335 18 for %st(7) (gcc regno = 15)
2337 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2339 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2340 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2341 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2342 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2343 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2345 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2348 /* Define parameter passing and return registers. */
2350 static int const x86_64_int_parameter_registers
[6] =
2352 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2355 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2357 CX_REG
, DX_REG
, R8_REG
, R9_REG
2360 static int const x86_64_int_return_registers
[4] =
2362 AX_REG
, DX_REG
, DI_REG
, SI_REG
2365 /* Define the structure for the machine field in struct function. */
2367 struct GTY(()) stack_local_entry
{
2368 unsigned short mode
;
2371 struct stack_local_entry
*next
;
2374 /* Structure describing stack frame layout.
2375 Stack grows downward:
2381 saved static chain if ix86_static_chain_on_stack
2383 saved frame pointer if frame_pointer_needed
2384 <- HARD_FRAME_POINTER
2390 <- sse_regs_save_offset
2393 [va_arg registers] |
2397 [padding2] | = to_allocate
2406 int outgoing_arguments_size
;
2407 HOST_WIDE_INT frame
;
2409 /* The offsets relative to ARG_POINTER. */
2410 HOST_WIDE_INT frame_pointer_offset
;
2411 HOST_WIDE_INT hard_frame_pointer_offset
;
2412 HOST_WIDE_INT stack_pointer_offset
;
2413 HOST_WIDE_INT hfp_save_offset
;
2414 HOST_WIDE_INT reg_save_offset
;
2415 HOST_WIDE_INT sse_reg_save_offset
;
2417 /* When save_regs_using_mov is set, emit prologue using
2418 move instead of push instructions. */
2419 bool save_regs_using_mov
;
2422 /* Which cpu are we scheduling for. */
2423 enum attr_cpu ix86_schedule
;
2425 /* Which cpu are we optimizing for. */
2426 enum processor_type ix86_tune
;
2428 /* Which instruction set architecture to use. */
2429 enum processor_type ix86_arch
;
2431 /* true if sse prefetch instruction is not NOOP. */
2432 int x86_prefetch_sse
;
2434 /* -mstackrealign option */
2435 static const char ix86_force_align_arg_pointer_string
[]
2436 = "force_align_arg_pointer";
2438 static rtx (*ix86_gen_leave
) (void);
2439 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2440 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2441 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2442 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2443 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2444 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2445 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2446 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2447 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2448 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2449 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2451 /* Preferred alignment for stack boundary in bits. */
2452 unsigned int ix86_preferred_stack_boundary
;
2454 /* Alignment for incoming stack boundary in bits specified at
2456 static unsigned int ix86_user_incoming_stack_boundary
;
2458 /* Default alignment for incoming stack boundary in bits. */
2459 static unsigned int ix86_default_incoming_stack_boundary
;
2461 /* Alignment for incoming stack boundary in bits. */
2462 unsigned int ix86_incoming_stack_boundary
;
2464 /* Calling abi specific va_list type nodes. */
2465 static GTY(()) tree sysv_va_list_type_node
;
2466 static GTY(()) tree ms_va_list_type_node
;
2468 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2469 char internal_label_prefix
[16];
2470 int internal_label_prefix_len
;
2472 /* Fence to use after loop using movnt. */
2475 /* Register class used for passing given 64bit part of the argument.
2476 These represent classes as documented by the PS ABI, with the exception
2477 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2478 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2480 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2481 whenever possible (upper half does contain padding). */
2482 enum x86_64_reg_class
2485 X86_64_INTEGER_CLASS
,
2486 X86_64_INTEGERSI_CLASS
,
2493 X86_64_COMPLEX_X87_CLASS
,
2497 #define MAX_CLASSES 4
2499 /* Table of constants used by fldpi, fldln2, etc.... */
2500 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2501 static bool ext_80387_constants_init
= 0;
2504 static struct machine_function
* ix86_init_machine_status (void);
2505 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2506 static bool ix86_function_value_regno_p (const unsigned int);
2507 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2509 static rtx
ix86_static_chain (const_tree
, bool);
2510 static int ix86_function_regparm (const_tree
, const_tree
);
2511 static void ix86_compute_frame_layout (struct ix86_frame
*);
2512 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2514 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2515 static tree
ix86_canonical_va_list_type (tree
);
2516 static void predict_jump (int);
2517 static unsigned int split_stack_prologue_scratch_regno (void);
2518 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2520 enum ix86_function_specific_strings
2522 IX86_FUNCTION_SPECIFIC_ARCH
,
2523 IX86_FUNCTION_SPECIFIC_TUNE
,
2524 IX86_FUNCTION_SPECIFIC_MAX
2527 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2528 const char *, enum fpmath_unit
, bool);
2529 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2530 static void ix86_function_specific_save (struct cl_target_option
*);
2531 static void ix86_function_specific_restore (struct cl_target_option
*);
2532 static void ix86_function_specific_print (FILE *, int,
2533 struct cl_target_option
*);
2534 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2535 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2536 struct gcc_options
*);
2537 static bool ix86_can_inline_p (tree
, tree
);
2538 static void ix86_set_current_function (tree
);
2539 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2541 static enum calling_abi
ix86_function_abi (const_tree
);
2544 #ifndef SUBTARGET32_DEFAULT_CPU
2545 #define SUBTARGET32_DEFAULT_CPU "i386"
2548 /* The svr4 ABI for the i386 says that records and unions are returned
2550 #ifndef DEFAULT_PCC_STRUCT_RETURN
2551 #define DEFAULT_PCC_STRUCT_RETURN 1
2554 /* Whether -mtune= or -march= were specified */
2555 static int ix86_tune_defaulted
;
2556 static int ix86_arch_specified
;
2558 /* Vectorization library interface and handlers. */
2559 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2561 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2562 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2564 /* Processor target table, indexed by processor number */
2567 const struct processor_costs
*cost
; /* Processor costs */
2568 const int align_loop
; /* Default alignments. */
2569 const int align_loop_max_skip
;
2570 const int align_jump
;
2571 const int align_jump_max_skip
;
2572 const int align_func
;
2575 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2577 {&i386_cost
, 4, 3, 4, 3, 4},
2578 {&i486_cost
, 16, 15, 16, 15, 16},
2579 {&pentium_cost
, 16, 7, 16, 7, 16},
2580 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2581 {&geode_cost
, 0, 0, 0, 0, 0},
2582 {&k6_cost
, 32, 7, 32, 7, 32},
2583 {&athlon_cost
, 16, 7, 16, 7, 16},
2584 {&pentium4_cost
, 0, 0, 0, 0, 0},
2585 {&k8_cost
, 16, 7, 16, 7, 16},
2586 {&nocona_cost
, 0, 0, 0, 0, 0},
2587 /* Core 2 32-bit. */
2588 {&generic32_cost
, 16, 10, 16, 10, 16},
2589 /* Core 2 64-bit. */
2590 {&generic64_cost
, 16, 10, 16, 10, 16},
2591 /* Core i7 32-bit. */
2592 {&generic32_cost
, 16, 10, 16, 10, 16},
2593 /* Core i7 64-bit. */
2594 {&generic64_cost
, 16, 10, 16, 10, 16},
2595 {&generic32_cost
, 16, 7, 16, 7, 16},
2596 {&generic64_cost
, 16, 10, 16, 10, 16},
2597 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2598 {&bdver1_cost
, 32, 24, 32, 7, 32},
2599 {&bdver2_cost
, 32, 24, 32, 7, 32},
2600 {&btver1_cost
, 32, 24, 32, 7, 32},
2601 {&atom_cost
, 16, 15, 16, 7, 16}
2604 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2634 /* Return true if a red-zone is in use. */
2637 ix86_using_red_zone (void)
2639 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2642 /* Return a string that documents the current -m options. The caller is
2643 responsible for freeing the string. */
2646 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2647 const char *tune
, enum fpmath_unit fpmath
,
2650 struct ix86_target_opts
2652 const char *option
; /* option string */
2653 HOST_WIDE_INT mask
; /* isa mask options */
2656 /* This table is ordered so that options like -msse4.2 that imply
2657 preceding options while match those first. */
2658 static struct ix86_target_opts isa_opts
[] =
2660 { "-m64", OPTION_MASK_ISA_64BIT
},
2661 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2662 { "-mfma", OPTION_MASK_ISA_FMA
},
2663 { "-mxop", OPTION_MASK_ISA_XOP
},
2664 { "-mlwp", OPTION_MASK_ISA_LWP
},
2665 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2666 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2667 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2668 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2669 { "-msse3", OPTION_MASK_ISA_SSE3
},
2670 { "-msse2", OPTION_MASK_ISA_SSE2
},
2671 { "-msse", OPTION_MASK_ISA_SSE
},
2672 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2673 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2674 { "-mmmx", OPTION_MASK_ISA_MMX
},
2675 { "-mabm", OPTION_MASK_ISA_ABM
},
2676 { "-mbmi", OPTION_MASK_ISA_BMI
},
2677 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2678 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2679 { "-mtbm", OPTION_MASK_ISA_TBM
},
2680 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2681 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2682 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2683 { "-maes", OPTION_MASK_ISA_AES
},
2684 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2685 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2686 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2687 { "-mf16c", OPTION_MASK_ISA_F16C
},
2688 { "-mrtm", OPTION_MASK_ISA_RTM
},
2692 static struct ix86_target_opts flag_opts
[] =
2694 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2695 { "-m80387", MASK_80387
},
2696 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2697 { "-malign-double", MASK_ALIGN_DOUBLE
},
2698 { "-mcld", MASK_CLD
},
2699 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2700 { "-mieee-fp", MASK_IEEE_FP
},
2701 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2702 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2703 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2704 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2705 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2706 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2707 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2708 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2709 { "-mrecip", MASK_RECIP
},
2710 { "-mrtd", MASK_RTD
},
2711 { "-msseregparm", MASK_SSEREGPARM
},
2712 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2713 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2714 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2715 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2716 { "-mvzeroupper", MASK_VZEROUPPER
},
2717 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2718 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2719 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2722 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2725 char target_other
[40];
2734 memset (opts
, '\0', sizeof (opts
));
2736 /* Add -march= option. */
2739 opts
[num
][0] = "-march=";
2740 opts
[num
++][1] = arch
;
2743 /* Add -mtune= option. */
2746 opts
[num
][0] = "-mtune=";
2747 opts
[num
++][1] = tune
;
2750 /* Pick out the options in isa options. */
2751 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2753 if ((isa
& isa_opts
[i
].mask
) != 0)
2755 opts
[num
++][0] = isa_opts
[i
].option
;
2756 isa
&= ~ isa_opts
[i
].mask
;
2760 if (isa
&& add_nl_p
)
2762 opts
[num
++][0] = isa_other
;
2763 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2767 /* Add flag options. */
2768 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2770 if ((flags
& flag_opts
[i
].mask
) != 0)
2772 opts
[num
++][0] = flag_opts
[i
].option
;
2773 flags
&= ~ flag_opts
[i
].mask
;
2777 if (flags
&& add_nl_p
)
2779 opts
[num
++][0] = target_other
;
2780 sprintf (target_other
, "(other flags: %#x)", flags
);
2783 /* Add -fpmath= option. */
2786 opts
[num
][0] = "-mfpmath=";
2787 switch ((int) fpmath
)
2790 opts
[num
++][1] = "387";
2794 opts
[num
++][1] = "sse";
2797 case FPMATH_387
| FPMATH_SSE
:
2798 opts
[num
++][1] = "sse+387";
2810 gcc_assert (num
< ARRAY_SIZE (opts
));
2812 /* Size the string. */
2814 sep_len
= (add_nl_p
) ? 3 : 1;
2815 for (i
= 0; i
< num
; i
++)
2818 for (j
= 0; j
< 2; j
++)
2820 len
+= strlen (opts
[i
][j
]);
2823 /* Build the string. */
2824 ret
= ptr
= (char *) xmalloc (len
);
2827 for (i
= 0; i
< num
; i
++)
2831 for (j
= 0; j
< 2; j
++)
2832 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2839 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2847 for (j
= 0; j
< 2; j
++)
2850 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2852 line_len
+= len2
[j
];
2857 gcc_assert (ret
+ len
>= ptr
);
2862 /* Return true, if profiling code should be emitted before
2863 prologue. Otherwise it returns false.
2864 Note: For x86 with "hotfix" it is sorried. */
2866 ix86_profile_before_prologue (void)
2868 return flag_fentry
!= 0;
2871 /* Function that is callable from the debugger to print the current
2874 ix86_debug_options (void)
2876 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2877 ix86_arch_string
, ix86_tune_string
,
2882 fprintf (stderr
, "%s\n\n", opts
);
2886 fputs ("<no options>\n\n", stderr
);
2891 /* Override various settings based on options. If MAIN_ARGS_P, the
2892 options are from the command line, otherwise they are from
2896 ix86_option_override_internal (bool main_args_p
)
2899 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2900 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2905 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2906 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2907 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2908 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2909 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2910 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2911 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2912 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2913 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2914 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2915 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2916 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2917 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2918 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2919 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2920 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2921 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2922 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2923 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2924 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2925 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2926 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2927 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2928 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2929 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2930 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2931 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2932 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2933 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2934 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2935 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2936 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2937 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2938 /* if this reaches 64, need to widen struct pta flags below */
2942 const char *const name
; /* processor name or nickname. */
2943 const enum processor_type processor
;
2944 const enum attr_cpu schedule
;
2945 const unsigned HOST_WIDE_INT flags
;
2947 const processor_alias_table
[] =
2949 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2950 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2951 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2952 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2953 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2954 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2955 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2956 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2957 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2958 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2959 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2960 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2961 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2963 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2965 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2966 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2967 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2968 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2969 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2970 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2971 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2972 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2973 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2974 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2975 | PTA_CX16
| PTA_NO_SAHF
},
2976 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
2977 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2978 | PTA_SSSE3
| PTA_CX16
},
2979 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
2980 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2981 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
2982 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
2983 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2984 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2985 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
2986 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
2987 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2988 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2989 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2990 | PTA_RDRND
| PTA_F16C
},
2991 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
2992 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2993 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2994 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2995 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2996 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
},
2997 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2998 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2999 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
3000 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3001 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3002 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3003 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3004 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3005 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3006 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3007 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3008 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3009 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3010 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3011 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3012 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3013 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3014 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3015 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3016 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3017 {"k8", PROCESSOR_K8
, CPU_K8
,
3018 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3019 | PTA_SSE2
| PTA_NO_SAHF
},
3020 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3021 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3022 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3023 {"opteron", PROCESSOR_K8
, CPU_K8
,
3024 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3025 | PTA_SSE2
| PTA_NO_SAHF
},
3026 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3027 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3028 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3029 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3030 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3031 | PTA_SSE2
| PTA_NO_SAHF
},
3032 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3033 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3034 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3035 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3036 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3037 | PTA_SSE2
| PTA_NO_SAHF
},
3038 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3039 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3040 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3041 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3042 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3043 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3044 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3045 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3046 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3047 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3048 | PTA_XOP
| PTA_LWP
},
3049 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3050 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3051 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3052 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3053 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3055 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3056 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3057 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3058 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3059 0 /* flags are only used for -march switch. */ },
3060 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3061 PTA_64BIT
/* flags are only used for -march switch. */ },
3064 /* -mrecip options. */
3067 const char *string
; /* option name */
3068 unsigned int mask
; /* mask bits to set */
3070 const recip_options
[] =
3072 { "all", RECIP_MASK_ALL
},
3073 { "none", RECIP_MASK_NONE
},
3074 { "div", RECIP_MASK_DIV
},
3075 { "sqrt", RECIP_MASK_SQRT
},
3076 { "vec-div", RECIP_MASK_VEC_DIV
},
3077 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3080 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3082 /* Set up prefix/suffix so the error messages refer to either the command
3083 line argument, or the attribute(target). */
3092 prefix
= "option(\"";
3097 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3098 SUBTARGET_OVERRIDE_OPTIONS
;
3101 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3102 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3106 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3108 /* -fPIC is the default for x86_64. */
3109 if (TARGET_MACHO
&& TARGET_64BIT
)
3112 /* Need to check -mtune=generic first. */
3113 if (ix86_tune_string
)
3115 if (!strcmp (ix86_tune_string
, "generic")
3116 || !strcmp (ix86_tune_string
, "i686")
3117 /* As special support for cross compilers we read -mtune=native
3118 as -mtune=generic. With native compilers we won't see the
3119 -mtune=native, as it was changed by the driver. */
3120 || !strcmp (ix86_tune_string
, "native"))
3123 ix86_tune_string
= "generic64";
3125 ix86_tune_string
= "generic32";
3127 /* If this call is for setting the option attribute, allow the
3128 generic32/generic64 that was previously set. */
3129 else if (!main_args_p
3130 && (!strcmp (ix86_tune_string
, "generic32")
3131 || !strcmp (ix86_tune_string
, "generic64")))
3133 else if (!strncmp (ix86_tune_string
, "generic", 7))
3134 error ("bad value (%s) for %stune=%s %s",
3135 ix86_tune_string
, prefix
, suffix
, sw
);
3136 else if (!strcmp (ix86_tune_string
, "x86-64"))
3137 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3138 "%stune=k8%s or %stune=generic%s instead as appropriate",
3139 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3143 if (ix86_arch_string
)
3144 ix86_tune_string
= ix86_arch_string
;
3145 if (!ix86_tune_string
)
3147 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3148 ix86_tune_defaulted
= 1;
3151 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3152 need to use a sensible tune option. */
3153 if (!strcmp (ix86_tune_string
, "generic")
3154 || !strcmp (ix86_tune_string
, "x86-64")
3155 || !strcmp (ix86_tune_string
, "i686"))
3158 ix86_tune_string
= "generic64";
3160 ix86_tune_string
= "generic32";
3164 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3166 /* rep; movq isn't available in 32-bit code. */
3167 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3168 ix86_stringop_alg
= no_stringop
;
3171 if (!ix86_arch_string
)
3172 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3174 ix86_arch_specified
= 1;
3176 if (global_options_set
.x_ix86_pmode
)
3178 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3179 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3180 error ("address mode %qs not supported in the %s bit mode",
3181 TARGET_64BIT
? "short" : "long",
3182 TARGET_64BIT
? "64" : "32");
3185 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3187 if (!global_options_set
.x_ix86_abi
)
3188 ix86_abi
= DEFAULT_ABI
;
3190 if (global_options_set
.x_ix86_cmodel
)
3192 switch (ix86_cmodel
)
3197 ix86_cmodel
= CM_SMALL_PIC
;
3199 error ("code model %qs not supported in the %s bit mode",
3206 ix86_cmodel
= CM_MEDIUM_PIC
;
3208 error ("code model %qs not supported in the %s bit mode",
3210 else if (TARGET_X32
)
3211 error ("code model %qs not supported in x32 mode",
3218 ix86_cmodel
= CM_LARGE_PIC
;
3220 error ("code model %qs not supported in the %s bit mode",
3222 else if (TARGET_X32
)
3223 error ("code model %qs not supported in x32 mode",
3229 error ("code model %s does not support PIC mode", "32");
3231 error ("code model %qs not supported in the %s bit mode",
3238 error ("code model %s does not support PIC mode", "kernel");
3239 ix86_cmodel
= CM_32
;
3242 error ("code model %qs not supported in the %s bit mode",
3252 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3253 use of rip-relative addressing. This eliminates fixups that
3254 would otherwise be needed if this object is to be placed in a
3255 DLL, and is essentially just as efficient as direct addressing. */
3256 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3257 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3258 else if (TARGET_64BIT
)
3259 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3261 ix86_cmodel
= CM_32
;
3263 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3265 error ("-masm=intel not supported in this configuration");
3266 ix86_asm_dialect
= ASM_ATT
;
3268 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3269 sorry ("%i-bit mode not compiled in",
3270 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3272 for (i
= 0; i
< pta_size
; i
++)
3273 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3275 ix86_schedule
= processor_alias_table
[i
].schedule
;
3276 ix86_arch
= processor_alias_table
[i
].processor
;
3277 /* Default cpu tuning to the architecture. */
3278 ix86_tune
= ix86_arch
;
3280 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3281 error ("CPU you selected does not support x86-64 "
3284 if (processor_alias_table
[i
].flags
& PTA_MMX
3285 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3286 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3287 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3288 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3289 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3290 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3291 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3292 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3293 if (processor_alias_table
[i
].flags
& PTA_SSE
3294 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3295 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3296 if (processor_alias_table
[i
].flags
& PTA_SSE2
3297 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3298 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3299 if (processor_alias_table
[i
].flags
& PTA_SSE3
3300 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3301 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3302 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3303 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3304 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3305 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3306 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3307 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3308 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3309 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3310 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3311 if (processor_alias_table
[i
].flags
& PTA_AVX
3312 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3313 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3314 if (processor_alias_table
[i
].flags
& PTA_AVX2
3315 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3316 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3317 if (processor_alias_table
[i
].flags
& PTA_FMA
3318 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3319 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3320 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3321 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3322 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3323 if (processor_alias_table
[i
].flags
& PTA_FMA4
3324 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3325 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3326 if (processor_alias_table
[i
].flags
& PTA_XOP
3327 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3328 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3329 if (processor_alias_table
[i
].flags
& PTA_LWP
3330 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3331 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3332 if (processor_alias_table
[i
].flags
& PTA_ABM
3333 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3334 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3335 if (processor_alias_table
[i
].flags
& PTA_BMI
3336 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3337 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3338 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3339 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3340 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3341 if (processor_alias_table
[i
].flags
& PTA_TBM
3342 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3343 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3344 if (processor_alias_table
[i
].flags
& PTA_BMI2
3345 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3346 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3347 if (processor_alias_table
[i
].flags
& PTA_CX16
3348 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3349 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3350 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3351 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3352 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3353 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3354 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3355 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3356 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3357 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3358 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3359 if (processor_alias_table
[i
].flags
& PTA_AES
3360 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3361 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3362 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3363 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3364 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3365 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3366 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3367 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3368 if (processor_alias_table
[i
].flags
& PTA_RDRND
3369 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3370 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3371 if (processor_alias_table
[i
].flags
& PTA_F16C
3372 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3373 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3374 if (processor_alias_table
[i
].flags
& PTA_RTM
3375 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3376 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3377 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3378 x86_prefetch_sse
= true;
3383 if (!strcmp (ix86_arch_string
, "generic"))
3384 error ("generic CPU can be used only for %stune=%s %s",
3385 prefix
, suffix
, sw
);
3386 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3387 error ("bad value (%s) for %sarch=%s %s",
3388 ix86_arch_string
, prefix
, suffix
, sw
);
3390 ix86_arch_mask
= 1u << ix86_arch
;
3391 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3392 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3394 for (i
= 0; i
< pta_size
; i
++)
3395 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3397 ix86_schedule
= processor_alias_table
[i
].schedule
;
3398 ix86_tune
= processor_alias_table
[i
].processor
;
3401 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3403 if (ix86_tune_defaulted
)
3405 ix86_tune_string
= "x86-64";
3406 for (i
= 0; i
< pta_size
; i
++)
3407 if (! strcmp (ix86_tune_string
,
3408 processor_alias_table
[i
].name
))
3410 ix86_schedule
= processor_alias_table
[i
].schedule
;
3411 ix86_tune
= processor_alias_table
[i
].processor
;
3414 error ("CPU you selected does not support x86-64 "
3420 /* Adjust tuning when compiling for 32-bit ABI. */
3423 case PROCESSOR_GENERIC64
:
3424 ix86_tune
= PROCESSOR_GENERIC32
;
3425 ix86_schedule
= CPU_PENTIUMPRO
;
3428 case PROCESSOR_CORE2_64
:
3429 ix86_tune
= PROCESSOR_CORE2_32
;
3432 case PROCESSOR_COREI7_64
:
3433 ix86_tune
= PROCESSOR_COREI7_32
;
3440 /* Intel CPUs have always interpreted SSE prefetch instructions as
3441 NOPs; so, we can enable SSE prefetch instructions even when
3442 -mtune (rather than -march) points us to a processor that has them.
3443 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3444 higher processors. */
3446 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3447 x86_prefetch_sse
= true;
3451 if (ix86_tune_specified
&& i
== pta_size
)
3452 error ("bad value (%s) for %stune=%s %s",
3453 ix86_tune_string
, prefix
, suffix
, sw
);
3455 ix86_tune_mask
= 1u << ix86_tune
;
3456 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3457 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3459 #ifndef USE_IX86_FRAME_POINTER
3460 #define USE_IX86_FRAME_POINTER 0
3463 #ifndef USE_X86_64_FRAME_POINTER
3464 #define USE_X86_64_FRAME_POINTER 0
3467 /* Set the default values for switches whose default depends on TARGET_64BIT
3468 in case they weren't overwritten by command line options. */
3471 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3472 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3473 if (flag_asynchronous_unwind_tables
== 2)
3474 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3475 if (flag_pcc_struct_return
== 2)
3476 flag_pcc_struct_return
= 0;
3480 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3481 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3482 if (flag_asynchronous_unwind_tables
== 2)
3483 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3484 if (flag_pcc_struct_return
== 2)
3485 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3489 ix86_cost
= &ix86_size_cost
;
3491 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3493 /* Arrange to set up i386_stack_locals for all functions. */
3494 init_machine_status
= ix86_init_machine_status
;
3496 /* Validate -mregparm= value. */
3497 if (global_options_set
.x_ix86_regparm
)
3500 warning (0, "-mregparm is ignored in 64-bit mode");
3501 if (ix86_regparm
> REGPARM_MAX
)
3503 error ("-mregparm=%d is not between 0 and %d",
3504 ix86_regparm
, REGPARM_MAX
);
3509 ix86_regparm
= REGPARM_MAX
;
3511 /* Default align_* from the processor table. */
3512 if (align_loops
== 0)
3514 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3515 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3517 if (align_jumps
== 0)
3519 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3520 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3522 if (align_functions
== 0)
3524 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3527 /* Provide default for -mbranch-cost= value. */
3528 if (!global_options_set
.x_ix86_branch_cost
)
3529 ix86_branch_cost
= ix86_cost
->branch_cost
;
3533 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3535 /* Enable by default the SSE and MMX builtins. Do allow the user to
3536 explicitly disable any of these. In particular, disabling SSE and
3537 MMX for kernel code is extremely useful. */
3538 if (!ix86_arch_specified
)
3540 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3541 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3544 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3548 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3550 if (!ix86_arch_specified
)
3552 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3554 /* i386 ABI does not specify red zone. It still makes sense to use it
3555 when programmer takes care to stack from being destroyed. */
3556 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3557 target_flags
|= MASK_NO_RED_ZONE
;
3560 /* Keep nonleaf frame pointers. */
3561 if (flag_omit_frame_pointer
)
3562 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3563 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3564 flag_omit_frame_pointer
= 1;
3566 /* If we're doing fast math, we don't care about comparison order
3567 wrt NaNs. This lets us use a shorter comparison sequence. */
3568 if (flag_finite_math_only
)
3569 target_flags
&= ~MASK_IEEE_FP
;
3571 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3572 since the insns won't need emulation. */
3573 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3574 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3576 /* Likewise, if the target doesn't have a 387, or we've specified
3577 software floating point, don't use 387 inline intrinsics. */
3579 target_flags
|= MASK_NO_FANCY_MATH_387
;
3581 /* Turn on MMX builtins for -msse. */
3584 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3585 x86_prefetch_sse
= true;
3588 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3589 if (TARGET_SSE4_2
|| TARGET_ABM
)
3590 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3592 /* Turn on lzcnt instruction for -mabm. */
3594 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3596 /* Validate -mpreferred-stack-boundary= value or default it to
3597 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3598 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3599 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3601 int min
= (TARGET_64BIT
? 4 : 2);
3602 int max
= (TARGET_SEH
? 4 : 12);
3604 if (ix86_preferred_stack_boundary_arg
< min
3605 || ix86_preferred_stack_boundary_arg
> max
)
3608 error ("-mpreferred-stack-boundary is not supported "
3611 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3612 ix86_preferred_stack_boundary_arg
, min
, max
);
3615 ix86_preferred_stack_boundary
3616 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3619 /* Set the default value for -mstackrealign. */
3620 if (ix86_force_align_arg_pointer
== -1)
3621 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3623 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3625 /* Validate -mincoming-stack-boundary= value or default it to
3626 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3627 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3628 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3630 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3631 || ix86_incoming_stack_boundary_arg
> 12)
3632 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3633 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3636 ix86_user_incoming_stack_boundary
3637 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3638 ix86_incoming_stack_boundary
3639 = ix86_user_incoming_stack_boundary
;
3643 /* Accept -msseregparm only if at least SSE support is enabled. */
3644 if (TARGET_SSEREGPARM
3646 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3648 if (global_options_set
.x_ix86_fpmath
)
3650 if (ix86_fpmath
& FPMATH_SSE
)
3654 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3655 ix86_fpmath
= FPMATH_387
;
3657 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3659 warning (0, "387 instruction set disabled, using SSE arithmetics");
3660 ix86_fpmath
= FPMATH_SSE
;
3665 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3667 /* If the i387 is disabled, then do not return values in it. */
3669 target_flags
&= ~MASK_FLOAT_RETURNS
;
3671 /* Use external vectorized library in vectorizing intrinsics. */
3672 if (global_options_set
.x_ix86_veclibabi_type
)
3673 switch (ix86_veclibabi_type
)
3675 case ix86_veclibabi_type_svml
:
3676 ix86_veclib_handler
= ix86_veclibabi_svml
;
3679 case ix86_veclibabi_type_acml
:
3680 ix86_veclib_handler
= ix86_veclibabi_acml
;
3687 if ((!USE_IX86_FRAME_POINTER
3688 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3689 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3691 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3693 /* ??? Unwind info is not correct around the CFG unless either a frame
3694 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3695 unwind info generation to be aware of the CFG and propagating states
3697 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3698 || flag_exceptions
|| flag_non_call_exceptions
)
3699 && flag_omit_frame_pointer
3700 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3702 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3703 warning (0, "unwind tables currently require either a frame pointer "
3704 "or %saccumulate-outgoing-args%s for correctness",
3706 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3709 /* If stack probes are required, the space used for large function
3710 arguments on the stack must also be probed, so enable
3711 -maccumulate-outgoing-args so this happens in the prologue. */
3712 if (TARGET_STACK_PROBE
3713 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3715 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3716 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3717 "for correctness", prefix
, suffix
);
3718 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3721 /* For sane SSE instruction set generation we need fcomi instruction.
3722 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3723 expands to a sequence that includes conditional move. */
3724 if (TARGET_SSE
|| TARGET_RDRND
)
3727 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3730 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3731 p
= strchr (internal_label_prefix
, 'X');
3732 internal_label_prefix_len
= p
- internal_label_prefix
;
3736 /* When scheduling description is not available, disable scheduler pass
3737 so it won't slow down the compilation and make x87 code slower. */
3738 if (!TARGET_SCHEDULE
)
3739 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3741 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3742 ix86_cost
->simultaneous_prefetches
,
3743 global_options
.x_param_values
,
3744 global_options_set
.x_param_values
);
3745 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
3746 global_options
.x_param_values
,
3747 global_options_set
.x_param_values
);
3748 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
3749 global_options
.x_param_values
,
3750 global_options_set
.x_param_values
);
3751 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
3752 global_options
.x_param_values
,
3753 global_options_set
.x_param_values
);
3755 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3756 if (flag_prefetch_loop_arrays
< 0
3759 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3760 flag_prefetch_loop_arrays
= 1;
3762 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3763 can be optimized to ap = __builtin_next_arg (0). */
3764 if (!TARGET_64BIT
&& !flag_split_stack
)
3765 targetm
.expand_builtin_va_start
= NULL
;
3769 ix86_gen_leave
= gen_leave_rex64
;
3770 if (Pmode
== DImode
)
3772 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3773 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3774 ix86_gen_tls_local_dynamic_base_64
3775 = gen_tls_local_dynamic_base_64_di
;
3779 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3780 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3781 ix86_gen_tls_local_dynamic_base_64
3782 = gen_tls_local_dynamic_base_64_si
;
3787 ix86_gen_leave
= gen_leave
;
3788 ix86_gen_monitor
= gen_sse3_monitor
;
3791 if (Pmode
== DImode
)
3793 ix86_gen_add3
= gen_adddi3
;
3794 ix86_gen_sub3
= gen_subdi3
;
3795 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3796 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3797 ix86_gen_andsp
= gen_anddi3
;
3798 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3799 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3800 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3804 ix86_gen_add3
= gen_addsi3
;
3805 ix86_gen_sub3
= gen_subsi3
;
3806 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3807 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3808 ix86_gen_andsp
= gen_andsi3
;
3809 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3810 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3811 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3815 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3817 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3820 if (!TARGET_64BIT
&& flag_pic
)
3822 if (flag_fentry
> 0)
3823 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3827 else if (TARGET_SEH
)
3829 if (flag_fentry
== 0)
3830 sorry ("-mno-fentry isn%'t compatible with SEH");
3833 else if (flag_fentry
< 0)
3835 #if defined(PROFILE_BEFORE_PROLOGUE)
3844 /* When not optimize for size, enable vzeroupper optimization for
3845 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3846 AVX unaligned load/store. */
3849 if (flag_expensive_optimizations
3850 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3851 target_flags
|= MASK_VZEROUPPER
;
3852 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3853 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3854 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3855 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3856 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3857 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3858 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3859 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
3860 target_flags
|= MASK_PREFER_AVX128
;
3865 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3866 target_flags
&= ~MASK_VZEROUPPER
;
3869 if (ix86_recip_name
)
3871 char *p
= ASTRDUP (ix86_recip_name
);
3873 unsigned int mask
, i
;
3876 while ((q
= strtok (p
, ",")) != NULL
)
3887 if (!strcmp (q
, "default"))
3888 mask
= RECIP_MASK_ALL
;
3891 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3892 if (!strcmp (q
, recip_options
[i
].string
))
3894 mask
= recip_options
[i
].mask
;
3898 if (i
== ARRAY_SIZE (recip_options
))
3900 error ("unknown option for -mrecip=%s", q
);
3902 mask
= RECIP_MASK_NONE
;
3906 recip_mask_explicit
|= mask
;
3908 recip_mask
&= ~mask
;
3915 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3916 else if (target_flags_explicit
& MASK_RECIP
)
3917 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3919 /* Save the initial options in case the user does function specific
3922 target_option_default_node
= target_option_current_node
3923 = build_target_option_node ();
3926 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3929 function_pass_avx256_p (const_rtx val
)
3934 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
3937 if (GET_CODE (val
) == PARALLEL
)
3942 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
3944 r
= XVECEXP (val
, 0, i
);
3945 if (GET_CODE (r
) == EXPR_LIST
3947 && REG_P (XEXP (r
, 0))
3948 && (GET_MODE (XEXP (r
, 0)) == OImode
3949 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
3957 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3960 ix86_option_override (void)
3962 ix86_option_override_internal (true);
3965 /* Update register usage after having seen the compiler flags. */
3968 ix86_conditional_register_usage (void)
3973 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3975 if (fixed_regs
[i
] > 1)
3976 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3977 if (call_used_regs
[i
] > 1)
3978 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3981 /* The PIC register, if it exists, is fixed. */
3982 j
= PIC_OFFSET_TABLE_REGNUM
;
3983 if (j
!= INVALID_REGNUM
)
3984 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3986 /* The 64-bit MS_ABI changes the set of call-used registers. */
3987 if (TARGET_64BIT_MS_ABI
)
3989 call_used_regs
[SI_REG
] = 0;
3990 call_used_regs
[DI_REG
] = 0;
3991 call_used_regs
[XMM6_REG
] = 0;
3992 call_used_regs
[XMM7_REG
] = 0;
3993 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3994 call_used_regs
[i
] = 0;
3997 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3998 other call-clobbered regs for 64-bit. */
4001 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4003 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4004 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4005 && call_used_regs
[i
])
4006 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4009 /* If MMX is disabled, squash the registers. */
4011 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4012 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4013 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4015 /* If SSE is disabled, squash the registers. */
4017 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4018 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4019 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4021 /* If the FPU is disabled, squash the registers. */
4022 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4023 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4024 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4025 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4027 /* If 32-bit, squash the 64-bit registers. */
4030 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4032 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4038 /* Save the current options */
4041 ix86_function_specific_save (struct cl_target_option
*ptr
)
4043 ptr
->arch
= ix86_arch
;
4044 ptr
->schedule
= ix86_schedule
;
4045 ptr
->tune
= ix86_tune
;
4046 ptr
->branch_cost
= ix86_branch_cost
;
4047 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4048 ptr
->arch_specified
= ix86_arch_specified
;
4049 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4050 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4051 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4053 /* The fields are char but the variables are not; make sure the
4054 values fit in the fields. */
4055 gcc_assert (ptr
->arch
== ix86_arch
);
4056 gcc_assert (ptr
->schedule
== ix86_schedule
);
4057 gcc_assert (ptr
->tune
== ix86_tune
);
4058 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4061 /* Restore the current options */
4064 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4066 enum processor_type old_tune
= ix86_tune
;
4067 enum processor_type old_arch
= ix86_arch
;
4068 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4071 ix86_arch
= (enum processor_type
) ptr
->arch
;
4072 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4073 ix86_tune
= (enum processor_type
) ptr
->tune
;
4074 ix86_branch_cost
= ptr
->branch_cost
;
4075 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4076 ix86_arch_specified
= ptr
->arch_specified
;
4077 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4078 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4079 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4081 /* Recreate the arch feature tests if the arch changed */
4082 if (old_arch
!= ix86_arch
)
4084 ix86_arch_mask
= 1u << ix86_arch
;
4085 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4086 ix86_arch_features
[i
]
4087 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4090 /* Recreate the tune optimization tests */
4091 if (old_tune
!= ix86_tune
)
4093 ix86_tune_mask
= 1u << ix86_tune
;
4094 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4095 ix86_tune_features
[i
]
4096 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4100 /* Print the current options */
4103 ix86_function_specific_print (FILE *file
, int indent
,
4104 struct cl_target_option
*ptr
)
4107 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4108 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4110 fprintf (file
, "%*sarch = %d (%s)\n",
4113 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4114 ? cpu_names
[ptr
->arch
]
4117 fprintf (file
, "%*stune = %d (%s)\n",
4120 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4121 ? cpu_names
[ptr
->tune
]
4124 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4128 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4129 free (target_string
);
4134 /* Inner function to process the attribute((target(...))), take an argument and
4135 set the current options from the argument. If we have a list, recursively go
4139 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4140 struct gcc_options
*enum_opts_set
)
4145 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4146 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4147 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4148 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4149 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4165 enum ix86_opt_type type
;
4170 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4171 IX86_ATTR_ISA ("abm", OPT_mabm
),
4172 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4173 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4174 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4175 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4176 IX86_ATTR_ISA ("aes", OPT_maes
),
4177 IX86_ATTR_ISA ("avx", OPT_mavx
),
4178 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4179 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4180 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4181 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4182 IX86_ATTR_ISA ("sse", OPT_msse
),
4183 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4184 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4185 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4186 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4187 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4188 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4189 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4190 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4191 IX86_ATTR_ISA ("fma", OPT_mfma
),
4192 IX86_ATTR_ISA ("xop", OPT_mxop
),
4193 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4194 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4195 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4196 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4197 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4200 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4202 /* string options */
4203 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4204 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4207 IX86_ATTR_YES ("cld",
4211 IX86_ATTR_NO ("fancy-math-387",
4212 OPT_mfancy_math_387
,
4213 MASK_NO_FANCY_MATH_387
),
4215 IX86_ATTR_YES ("ieee-fp",
4219 IX86_ATTR_YES ("inline-all-stringops",
4220 OPT_minline_all_stringops
,
4221 MASK_INLINE_ALL_STRINGOPS
),
4223 IX86_ATTR_YES ("inline-stringops-dynamically",
4224 OPT_minline_stringops_dynamically
,
4225 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4227 IX86_ATTR_NO ("align-stringops",
4228 OPT_mno_align_stringops
,
4229 MASK_NO_ALIGN_STRINGOPS
),
4231 IX86_ATTR_YES ("recip",
4237 /* If this is a list, recurse to get the options. */
4238 if (TREE_CODE (args
) == TREE_LIST
)
4242 for (; args
; args
= TREE_CHAIN (args
))
4243 if (TREE_VALUE (args
)
4244 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4245 p_strings
, enum_opts_set
))
4251 else if (TREE_CODE (args
) != STRING_CST
)
4254 /* Handle multiple arguments separated by commas. */
4255 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4257 while (next_optstr
&& *next_optstr
!= '\0')
4259 char *p
= next_optstr
;
4261 char *comma
= strchr (next_optstr
, ',');
4262 const char *opt_string
;
4263 size_t len
, opt_len
;
4268 enum ix86_opt_type type
= ix86_opt_unknown
;
4274 len
= comma
- next_optstr
;
4275 next_optstr
= comma
+ 1;
4283 /* Recognize no-xxx. */
4284 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4293 /* Find the option. */
4296 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4298 type
= attrs
[i
].type
;
4299 opt_len
= attrs
[i
].len
;
4300 if (ch
== attrs
[i
].string
[0]
4301 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4304 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4307 mask
= attrs
[i
].mask
;
4308 opt_string
= attrs
[i
].string
;
4313 /* Process the option. */
4316 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4320 else if (type
== ix86_opt_isa
)
4322 struct cl_decoded_option decoded
;
4324 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4325 ix86_handle_option (&global_options
, &global_options_set
,
4326 &decoded
, input_location
);
4329 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4331 if (type
== ix86_opt_no
)
4332 opt_set_p
= !opt_set_p
;
4335 target_flags
|= mask
;
4337 target_flags
&= ~mask
;
4340 else if (type
== ix86_opt_str
)
4344 error ("option(\"%s\") was already specified", opt_string
);
4348 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4351 else if (type
== ix86_opt_enum
)
4356 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4358 set_option (&global_options
, enum_opts_set
, opt
, value
,
4359 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4363 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4375 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4378 ix86_valid_target_attribute_tree (tree args
)
4380 const char *orig_arch_string
= ix86_arch_string
;
4381 const char *orig_tune_string
= ix86_tune_string
;
4382 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4383 int orig_tune_defaulted
= ix86_tune_defaulted
;
4384 int orig_arch_specified
= ix86_arch_specified
;
4385 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4388 struct cl_target_option
*def
4389 = TREE_TARGET_OPTION (target_option_default_node
);
4390 struct gcc_options enum_opts_set
;
4392 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4394 /* Process each of the options on the chain. */
4395 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4399 /* If the changed options are different from the default, rerun
4400 ix86_option_override_internal, and then save the options away.
4401 The string options are are attribute options, and will be undone
4402 when we copy the save structure. */
4403 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4404 || target_flags
!= def
->x_target_flags
4405 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4406 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4407 || enum_opts_set
.x_ix86_fpmath
)
4409 /* If we are using the default tune= or arch=, undo the string assigned,
4410 and use the default. */
4411 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4412 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4413 else if (!orig_arch_specified
)
4414 ix86_arch_string
= NULL
;
4416 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4417 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4418 else if (orig_tune_defaulted
)
4419 ix86_tune_string
= NULL
;
4421 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4422 if (enum_opts_set
.x_ix86_fpmath
)
4423 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4424 else if (!TARGET_64BIT
&& TARGET_SSE
)
4426 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4427 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4430 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4431 ix86_option_override_internal (false);
4433 /* Add any builtin functions with the new isa if any. */
4434 ix86_add_new_builtins (ix86_isa_flags
);
4436 /* Save the current options unless we are validating options for
4438 t
= build_target_option_node ();
4440 ix86_arch_string
= orig_arch_string
;
4441 ix86_tune_string
= orig_tune_string
;
4442 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4444 /* Free up memory allocated to hold the strings */
4445 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4446 free (option_strings
[i
]);
4452 /* Hook to validate attribute((target("string"))). */
4455 ix86_valid_target_attribute_p (tree fndecl
,
4456 tree
ARG_UNUSED (name
),
4458 int ARG_UNUSED (flags
))
4460 struct cl_target_option cur_target
;
4462 tree old_optimize
= build_optimization_node ();
4463 tree new_target
, new_optimize
;
4464 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4466 /* If the function changed the optimization levels as well as setting target
4467 options, start with the optimizations specified. */
4468 if (func_optimize
&& func_optimize
!= old_optimize
)
4469 cl_optimization_restore (&global_options
,
4470 TREE_OPTIMIZATION (func_optimize
));
4472 /* The target attributes may also change some optimization flags, so update
4473 the optimization options if necessary. */
4474 cl_target_option_save (&cur_target
, &global_options
);
4475 new_target
= ix86_valid_target_attribute_tree (args
);
4476 new_optimize
= build_optimization_node ();
4483 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4485 if (old_optimize
!= new_optimize
)
4486 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4489 cl_target_option_restore (&global_options
, &cur_target
);
4491 if (old_optimize
!= new_optimize
)
4492 cl_optimization_restore (&global_options
,
4493 TREE_OPTIMIZATION (old_optimize
));
4499 /* Hook to determine if one function can safely inline another. */
4502 ix86_can_inline_p (tree caller
, tree callee
)
4505 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4506 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4508 /* If callee has no option attributes, then it is ok to inline. */
4512 /* If caller has no option attributes, but callee does then it is not ok to
4514 else if (!caller_tree
)
4519 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4520 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4522 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4523 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4525 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4526 != callee_opts
->x_ix86_isa_flags
)
4529 /* See if we have the same non-isa options. */
4530 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4533 /* See if arch, tune, etc. are the same. */
4534 else if (caller_opts
->arch
!= callee_opts
->arch
)
4537 else if (caller_opts
->tune
!= callee_opts
->tune
)
4540 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4543 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4554 /* Remember the last target of ix86_set_current_function. */
4555 static GTY(()) tree ix86_previous_fndecl
;
4557 /* Establish appropriate back-end context for processing the function
4558 FNDECL. The argument might be NULL to indicate processing at top
4559 level, outside of any function scope. */
4561 ix86_set_current_function (tree fndecl
)
4563 /* Only change the context if the function changes. This hook is called
4564 several times in the course of compiling a function, and we don't want to
4565 slow things down too much or call target_reinit when it isn't safe. */
4566 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4568 tree old_tree
= (ix86_previous_fndecl
4569 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4572 tree new_tree
= (fndecl
4573 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4576 ix86_previous_fndecl
= fndecl
;
4577 if (old_tree
== new_tree
)
4582 cl_target_option_restore (&global_options
,
4583 TREE_TARGET_OPTION (new_tree
));
4589 struct cl_target_option
*def
4590 = TREE_TARGET_OPTION (target_option_current_node
);
4592 cl_target_option_restore (&global_options
, def
);
4599 /* Return true if this goes in large data/bss. */
4602 ix86_in_large_data_p (tree exp
)
4604 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4607 /* Functions are never large data. */
4608 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4611 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4613 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4614 if (strcmp (section
, ".ldata") == 0
4615 || strcmp (section
, ".lbss") == 0)
4621 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4623 /* If this is an incomplete type with size 0, then we can't put it
4624 in data because it might be too big when completed. */
4625 if (!size
|| size
> ix86_section_threshold
)
4632 /* Switch to the appropriate section for output of DECL.
4633 DECL is either a `VAR_DECL' node or a constant of some sort.
4634 RELOC indicates whether forming the initial value of DECL requires
4635 link-time relocations. */
4637 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4641 x86_64_elf_select_section (tree decl
, int reloc
,
4642 unsigned HOST_WIDE_INT align
)
4644 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4645 && ix86_in_large_data_p (decl
))
4647 const char *sname
= NULL
;
4648 unsigned int flags
= SECTION_WRITE
;
4649 switch (categorize_decl_for_section (decl
, reloc
))
4654 case SECCAT_DATA_REL
:
4655 sname
= ".ldata.rel";
4657 case SECCAT_DATA_REL_LOCAL
:
4658 sname
= ".ldata.rel.local";
4660 case SECCAT_DATA_REL_RO
:
4661 sname
= ".ldata.rel.ro";
4663 case SECCAT_DATA_REL_RO_LOCAL
:
4664 sname
= ".ldata.rel.ro.local";
4668 flags
|= SECTION_BSS
;
4671 case SECCAT_RODATA_MERGE_STR
:
4672 case SECCAT_RODATA_MERGE_STR_INIT
:
4673 case SECCAT_RODATA_MERGE_CONST
:
4677 case SECCAT_SRODATA
:
4684 /* We don't split these for medium model. Place them into
4685 default sections and hope for best. */
4690 /* We might get called with string constants, but get_named_section
4691 doesn't like them as they are not DECLs. Also, we need to set
4692 flags in that case. */
4694 return get_section (sname
, flags
, NULL
);
4695 return get_named_section (decl
, sname
, reloc
);
4698 return default_elf_select_section (decl
, reloc
, align
);
4701 /* Build up a unique section name, expressed as a
4702 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4703 RELOC indicates whether the initial value of EXP requires
4704 link-time relocations. */
4706 static void ATTRIBUTE_UNUSED
4707 x86_64_elf_unique_section (tree decl
, int reloc
)
4709 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4710 && ix86_in_large_data_p (decl
))
4712 const char *prefix
= NULL
;
4713 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4714 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4716 switch (categorize_decl_for_section (decl
, reloc
))
4719 case SECCAT_DATA_REL
:
4720 case SECCAT_DATA_REL_LOCAL
:
4721 case SECCAT_DATA_REL_RO
:
4722 case SECCAT_DATA_REL_RO_LOCAL
:
4723 prefix
= one_only
? ".ld" : ".ldata";
4726 prefix
= one_only
? ".lb" : ".lbss";
4729 case SECCAT_RODATA_MERGE_STR
:
4730 case SECCAT_RODATA_MERGE_STR_INIT
:
4731 case SECCAT_RODATA_MERGE_CONST
:
4732 prefix
= one_only
? ".lr" : ".lrodata";
4734 case SECCAT_SRODATA
:
4741 /* We don't split these for medium model. Place them into
4742 default sections and hope for best. */
4747 const char *name
, *linkonce
;
4750 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4751 name
= targetm
.strip_name_encoding (name
);
4753 /* If we're using one_only, then there needs to be a .gnu.linkonce
4754 prefix to the section name. */
4755 linkonce
= one_only
? ".gnu.linkonce" : "";
4757 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4759 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4763 default_unique_section (decl
, reloc
);
4766 #ifdef COMMON_ASM_OP
4767 /* This says how to output assembler code to declare an
4768 uninitialized external linkage data object.
4770 For medium model x86-64 we need to use .largecomm opcode for
4773 x86_elf_aligned_common (FILE *file
,
4774 const char *name
, unsigned HOST_WIDE_INT size
,
4777 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4778 && size
> (unsigned int)ix86_section_threshold
)
4779 fputs (".largecomm\t", file
);
4781 fputs (COMMON_ASM_OP
, file
);
4782 assemble_name (file
, name
);
4783 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4784 size
, align
/ BITS_PER_UNIT
);
4788 /* Utility function for targets to use in implementing
4789 ASM_OUTPUT_ALIGNED_BSS. */
4792 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4793 const char *name
, unsigned HOST_WIDE_INT size
,
4796 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4797 && size
> (unsigned int)ix86_section_threshold
)
4798 switch_to_section (get_named_section (decl
, ".lbss", 0));
4800 switch_to_section (bss_section
);
4801 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4802 #ifdef ASM_DECLARE_OBJECT_NAME
4803 last_assemble_variable_decl
= decl
;
4804 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4806 /* Standard thing is just output label for the object. */
4807 ASM_OUTPUT_LABEL (file
, name
);
4808 #endif /* ASM_DECLARE_OBJECT_NAME */
4809 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4812 /* Decide whether we must probe the stack before any space allocation
4813 on this target. It's essentially TARGET_STACK_PROBE except when
4814 -fstack-check causes the stack to be already probed differently. */
4817 ix86_target_stack_probe (void)
4819 /* Do not probe the stack twice if static stack checking is enabled. */
4820 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4823 return TARGET_STACK_PROBE
;
4826 /* Decide whether we can make a sibling call to a function. DECL is the
4827 declaration of the function being targeted by the call and EXP is the
4828 CALL_EXPR representing the call. */
4831 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4833 tree type
, decl_or_type
;
4836 /* If we are generating position-independent code, we cannot sibcall
4837 optimize any indirect call, or a direct call to a global function,
4838 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4842 && (!decl
|| !targetm
.binds_local_p (decl
)))
4845 /* If we need to align the outgoing stack, then sibcalling would
4846 unalign the stack, which may break the called function. */
4847 if (ix86_minimum_incoming_stack_boundary (true)
4848 < PREFERRED_STACK_BOUNDARY
)
4853 decl_or_type
= decl
;
4854 type
= TREE_TYPE (decl
);
4858 /* We're looking at the CALL_EXPR, we need the type of the function. */
4859 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4860 type
= TREE_TYPE (type
); /* pointer type */
4861 type
= TREE_TYPE (type
); /* function type */
4862 decl_or_type
= type
;
4865 /* Check that the return value locations are the same. Like
4866 if we are returning floats on the 80387 register stack, we cannot
4867 make a sibcall from a function that doesn't return a float to a
4868 function that does or, conversely, from a function that does return
4869 a float to a function that doesn't; the necessary stack adjustment
4870 would not be executed. This is also the place we notice
4871 differences in the return value ABI. Note that it is ok for one
4872 of the functions to have void return type as long as the return
4873 value of the other is passed in a register. */
4874 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4875 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4877 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4879 if (!rtx_equal_p (a
, b
))
4882 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4884 /* Disable sibcall if we need to generate vzeroupper after
4886 if (TARGET_VZEROUPPER
4887 && cfun
->machine
->callee_return_avx256_p
4888 && !cfun
->machine
->caller_return_avx256_p
)
4891 else if (!rtx_equal_p (a
, b
))
4896 /* The SYSV ABI has more call-clobbered registers;
4897 disallow sibcalls from MS to SYSV. */
4898 if (cfun
->machine
->call_abi
== MS_ABI
4899 && ix86_function_type_abi (type
) == SYSV_ABI
)
4904 /* If this call is indirect, we'll need to be able to use a
4905 call-clobbered register for the address of the target function.
4906 Make sure that all such registers are not used for passing
4907 parameters. Note that DLLIMPORT functions are indirect. */
4909 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4911 if (ix86_function_regparm (type
, NULL
) >= 3)
4913 /* ??? Need to count the actual number of registers to be used,
4914 not the possible number of registers. Fix later. */
4920 /* Otherwise okay. That also includes certain types of indirect calls. */
4924 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4925 and "sseregparm" calling convention attributes;
4926 arguments as in struct attribute_spec.handler. */
4929 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4931 int flags ATTRIBUTE_UNUSED
,
4934 if (TREE_CODE (*node
) != FUNCTION_TYPE
4935 && TREE_CODE (*node
) != METHOD_TYPE
4936 && TREE_CODE (*node
) != FIELD_DECL
4937 && TREE_CODE (*node
) != TYPE_DECL
)
4939 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4941 *no_add_attrs
= true;
4945 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4946 if (is_attribute_p ("regparm", name
))
4950 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4952 error ("fastcall and regparm attributes are not compatible");
4955 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4957 error ("regparam and thiscall attributes are not compatible");
4960 cst
= TREE_VALUE (args
);
4961 if (TREE_CODE (cst
) != INTEGER_CST
)
4963 warning (OPT_Wattributes
,
4964 "%qE attribute requires an integer constant argument",
4966 *no_add_attrs
= true;
4968 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4970 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4972 *no_add_attrs
= true;
4980 /* Do not warn when emulating the MS ABI. */
4981 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4982 && TREE_CODE (*node
) != METHOD_TYPE
)
4983 || ix86_function_type_abi (*node
) != MS_ABI
)
4984 warning (OPT_Wattributes
, "%qE attribute ignored",
4986 *no_add_attrs
= true;
4990 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4991 if (is_attribute_p ("fastcall", name
))
4993 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4995 error ("fastcall and cdecl attributes are not compatible");
4997 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4999 error ("fastcall and stdcall attributes are not compatible");
5001 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5003 error ("fastcall and regparm attributes are not compatible");
5005 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5007 error ("fastcall and thiscall attributes are not compatible");
5011 /* Can combine stdcall with fastcall (redundant), regparm and
5013 else if (is_attribute_p ("stdcall", name
))
5015 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5017 error ("stdcall and cdecl attributes are not compatible");
5019 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5021 error ("stdcall and fastcall attributes are not compatible");
5023 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5025 error ("stdcall and thiscall attributes are not compatible");
5029 /* Can combine cdecl with regparm and sseregparm. */
5030 else if (is_attribute_p ("cdecl", name
))
5032 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5034 error ("stdcall and cdecl attributes are not compatible");
5036 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5038 error ("fastcall and cdecl attributes are not compatible");
5040 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5042 error ("cdecl and thiscall attributes are not compatible");
5045 else if (is_attribute_p ("thiscall", name
))
5047 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5048 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5050 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5052 error ("stdcall and thiscall attributes are not compatible");
5054 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5056 error ("fastcall and thiscall attributes are not compatible");
5058 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5060 error ("cdecl and thiscall attributes are not compatible");
5064 /* Can combine sseregparm with all attributes. */
5069 /* The transactional memory builtins are implicitly regparm or fastcall
5070 depending on the ABI. Override the generic do-nothing attribute that
5071 these builtins were declared with, and replace it with one of the two
5072 attributes that we expect elsewhere. */
5075 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5076 tree args ATTRIBUTE_UNUSED
,
5077 int flags ATTRIBUTE_UNUSED
,
5082 /* In no case do we want to add the placeholder attribute. */
5083 *no_add_attrs
= true;
5085 /* The 64-bit ABI is unchanged for transactional memory. */
5089 /* ??? Is there a better way to validate 32-bit windows? We have
5090 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5091 if (CHECK_STACK_LIMIT
> 0)
5092 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5095 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5096 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5098 decl_attributes (node
, alt
, flags
);
5103 /* This function determines from TYPE the calling-convention. */
5106 ix86_get_callcvt (const_tree type
)
5108 unsigned int ret
= 0;
5113 return IX86_CALLCVT_CDECL
;
5115 attrs
= TYPE_ATTRIBUTES (type
);
5116 if (attrs
!= NULL_TREE
)
5118 if (lookup_attribute ("cdecl", attrs
))
5119 ret
|= IX86_CALLCVT_CDECL
;
5120 else if (lookup_attribute ("stdcall", attrs
))
5121 ret
|= IX86_CALLCVT_STDCALL
;
5122 else if (lookup_attribute ("fastcall", attrs
))
5123 ret
|= IX86_CALLCVT_FASTCALL
;
5124 else if (lookup_attribute ("thiscall", attrs
))
5125 ret
|= IX86_CALLCVT_THISCALL
;
5127 /* Regparam isn't allowed for thiscall and fastcall. */
5128 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5130 if (lookup_attribute ("regparm", attrs
))
5131 ret
|= IX86_CALLCVT_REGPARM
;
5132 if (lookup_attribute ("sseregparm", attrs
))
5133 ret
|= IX86_CALLCVT_SSEREGPARM
;
5136 if (IX86_BASE_CALLCVT(ret
) != 0)
5140 is_stdarg
= stdarg_p (type
);
5141 if (TARGET_RTD
&& !is_stdarg
)
5142 return IX86_CALLCVT_STDCALL
| ret
;
5146 || TREE_CODE (type
) != METHOD_TYPE
5147 || ix86_function_type_abi (type
) != MS_ABI
)
5148 return IX86_CALLCVT_CDECL
| ret
;
5150 return IX86_CALLCVT_THISCALL
;
5153 /* Return 0 if the attributes for two types are incompatible, 1 if they
5154 are compatible, and 2 if they are nearly compatible (which causes a
5155 warning to be generated). */
5158 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5160 unsigned int ccvt1
, ccvt2
;
5162 if (TREE_CODE (type1
) != FUNCTION_TYPE
5163 && TREE_CODE (type1
) != METHOD_TYPE
)
5166 ccvt1
= ix86_get_callcvt (type1
);
5167 ccvt2
= ix86_get_callcvt (type2
);
5170 if (ix86_function_regparm (type1
, NULL
)
5171 != ix86_function_regparm (type2
, NULL
))
5177 /* Return the regparm value for a function with the indicated TYPE and DECL.
5178 DECL may be NULL when calling function indirectly
5179 or considering a libcall. */
5182 ix86_function_regparm (const_tree type
, const_tree decl
)
5189 return (ix86_function_type_abi (type
) == SYSV_ABI
5190 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5191 ccvt
= ix86_get_callcvt (type
);
5192 regparm
= ix86_regparm
;
5194 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5196 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5199 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5203 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5205 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5208 /* Use register calling convention for local functions when possible. */
5210 && TREE_CODE (decl
) == FUNCTION_DECL
5212 && !(profile_flag
&& !flag_fentry
))
5214 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5215 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5216 if (i
&& i
->local
&& i
->can_change_signature
)
5218 int local_regparm
, globals
= 0, regno
;
5220 /* Make sure no regparm register is taken by a
5221 fixed register variable. */
5222 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5223 if (fixed_regs
[local_regparm
])
5226 /* We don't want to use regparm(3) for nested functions as
5227 these use a static chain pointer in the third argument. */
5228 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5231 /* In 32-bit mode save a register for the split stack. */
5232 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5235 /* Each fixed register usage increases register pressure,
5236 so less registers should be used for argument passing.
5237 This functionality can be overriden by an explicit
5239 for (regno
= 0; regno
<= DI_REG
; regno
++)
5240 if (fixed_regs
[regno
])
5244 = globals
< local_regparm
? local_regparm
- globals
: 0;
5246 if (local_regparm
> regparm
)
5247 regparm
= local_regparm
;
5254 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5255 DFmode (2) arguments in SSE registers for a function with the
5256 indicated TYPE and DECL. DECL may be NULL when calling function
5257 indirectly or considering a libcall. Otherwise return 0. */
5260 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5262 gcc_assert (!TARGET_64BIT
);
5264 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5265 by the sseregparm attribute. */
5266 if (TARGET_SSEREGPARM
5267 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5274 error ("calling %qD with attribute sseregparm without "
5275 "SSE/SSE2 enabled", decl
);
5277 error ("calling %qT with attribute sseregparm without "
5278 "SSE/SSE2 enabled", type
);
5286 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5287 (and DFmode for SSE2) arguments in SSE registers. */
5288 if (decl
&& TARGET_SSE_MATH
&& optimize
5289 && !(profile_flag
&& !flag_fentry
))
5291 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5292 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5293 if (i
&& i
->local
&& i
->can_change_signature
)
5294 return TARGET_SSE2
? 2 : 1;
5300 /* Return true if EAX is live at the start of the function. Used by
5301 ix86_expand_prologue to determine if we need special help before
5302 calling allocate_stack_worker. */
5305 ix86_eax_live_at_start_p (void)
5307 /* Cheat. Don't bother working forward from ix86_function_regparm
5308 to the function type to whether an actual argument is located in
5309 eax. Instead just look at cfg info, which is still close enough
5310 to correct at this point. This gives false positives for broken
5311 functions that might use uninitialized data that happens to be
5312 allocated in eax, but who cares? */
5313 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5317 ix86_keep_aggregate_return_pointer (tree fntype
)
5323 attr
= lookup_attribute ("callee_pop_aggregate_return",
5324 TYPE_ATTRIBUTES (fntype
));
5326 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5328 /* For 32-bit MS-ABI the default is to keep aggregate
5330 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5333 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5336 /* Value is the number of bytes of arguments automatically
5337 popped when returning from a subroutine call.
5338 FUNDECL is the declaration node of the function (as a tree),
5339 FUNTYPE is the data type of the function (as a tree),
5340 or for a library call it is an identifier node for the subroutine name.
5341 SIZE is the number of bytes of arguments passed on the stack.
5343 On the 80386, the RTD insn may be used to pop them if the number
5344 of args is fixed, but if the number is variable then the caller
5345 must pop them all. RTD can't be used for library calls now
5346 because the library is compiled with the Unix compiler.
5347 Use of RTD is a selectable option, since it is incompatible with
5348 standard Unix calling sequences. If the option is not selected,
5349 the caller must always pop the args.
5351 The attribute stdcall is equivalent to RTD on a per module basis. */
5354 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5358 /* None of the 64-bit ABIs pop arguments. */
5362 ccvt
= ix86_get_callcvt (funtype
);
5364 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5365 | IX86_CALLCVT_THISCALL
)) != 0
5366 && ! stdarg_p (funtype
))
5369 /* Lose any fake structure return argument if it is passed on the stack. */
5370 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5371 && !ix86_keep_aggregate_return_pointer (funtype
))
5373 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5375 return GET_MODE_SIZE (Pmode
);
5381 /* Argument support functions. */
5383 /* Return true when register may be used to pass function parameters. */
5385 ix86_function_arg_regno_p (int regno
)
5388 const int *parm_regs
;
5393 return (regno
< REGPARM_MAX
5394 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5396 return (regno
< REGPARM_MAX
5397 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5398 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5399 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5400 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5405 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5410 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5411 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5415 /* TODO: The function should depend on current function ABI but
5416 builtins.c would need updating then. Therefore we use the
5419 /* RAX is used as hidden argument to va_arg functions. */
5420 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5423 if (ix86_abi
== MS_ABI
)
5424 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5426 parm_regs
= x86_64_int_parameter_registers
;
5427 for (i
= 0; i
< (ix86_abi
== MS_ABI
5428 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5429 if (regno
== parm_regs
[i
])
5434 /* Return if we do not know how to pass TYPE solely in registers. */
5437 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5439 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5442 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5443 The layout_type routine is crafty and tries to trick us into passing
5444 currently unsupported vector types on the stack by using TImode. */
5445 return (!TARGET_64BIT
&& mode
== TImode
5446 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5449 /* It returns the size, in bytes, of the area reserved for arguments passed
5450 in registers for the function represented by fndecl dependent to the used
5453 ix86_reg_parm_stack_space (const_tree fndecl
)
5455 enum calling_abi call_abi
= SYSV_ABI
;
5456 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5457 call_abi
= ix86_function_abi (fndecl
);
5459 call_abi
= ix86_function_type_abi (fndecl
);
5460 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5465 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5468 ix86_function_type_abi (const_tree fntype
)
5470 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5472 enum calling_abi abi
= ix86_abi
;
5473 if (abi
== SYSV_ABI
)
5475 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5478 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5486 ix86_function_ms_hook_prologue (const_tree fn
)
5488 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5490 if (decl_function_context (fn
) != NULL_TREE
)
5491 error_at (DECL_SOURCE_LOCATION (fn
),
5492 "ms_hook_prologue is not compatible with nested function");
5499 static enum calling_abi
5500 ix86_function_abi (const_tree fndecl
)
5504 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5507 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5510 ix86_cfun_abi (void)
5514 return cfun
->machine
->call_abi
;
5517 /* Write the extra assembler code needed to declare a function properly. */
5520 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5523 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5527 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5528 unsigned int filler_cc
= 0xcccccccc;
5530 for (i
= 0; i
< filler_count
; i
+= 4)
5531 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5534 #ifdef SUBTARGET_ASM_UNWIND_INIT
5535 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5538 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5540 /* Output magic byte marker, if hot-patch attribute is set. */
5545 /* leaq [%rsp + 0], %rsp */
5546 asm_fprintf (asm_out_file
, ASM_BYTE
5547 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5551 /* movl.s %edi, %edi
5553 movl.s %esp, %ebp */
5554 asm_fprintf (asm_out_file
, ASM_BYTE
5555 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5561 extern void init_regs (void);
5563 /* Implementation of call abi switching target hook. Specific to FNDECL
5564 the specific call register sets are set. See also
5565 ix86_conditional_register_usage for more details. */
5567 ix86_call_abi_override (const_tree fndecl
)
5569 if (fndecl
== NULL_TREE
)
5570 cfun
->machine
->call_abi
= ix86_abi
;
5572 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5575 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5576 expensive re-initialization of init_regs each time we switch function context
5577 since this is needed only during RTL expansion. */
5579 ix86_maybe_switch_abi (void)
5582 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5586 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5587 for a call to a function whose data type is FNTYPE.
5588 For a library call, FNTYPE is 0. */
5591 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5592 tree fntype
, /* tree ptr for function decl */
5593 rtx libname
, /* SYMBOL_REF of library name or 0 */
5597 struct cgraph_local_info
*i
;
5600 memset (cum
, 0, sizeof (*cum
));
5602 /* Initialize for the current callee. */
5605 cfun
->machine
->callee_pass_avx256_p
= false;
5606 cfun
->machine
->callee_return_avx256_p
= false;
5611 i
= cgraph_local_info (fndecl
);
5612 cum
->call_abi
= ix86_function_abi (fndecl
);
5613 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5618 cum
->call_abi
= ix86_function_type_abi (fntype
);
5620 fnret_type
= TREE_TYPE (fntype
);
5625 if (TARGET_VZEROUPPER
&& fnret_type
)
5627 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5629 if (function_pass_avx256_p (fnret_value
))
5631 /* The return value of this function uses 256bit AVX modes. */
5633 cfun
->machine
->callee_return_avx256_p
= true;
5635 cfun
->machine
->caller_return_avx256_p
= true;
5639 cum
->caller
= caller
;
5641 /* Set up the number of registers to use for passing arguments. */
5643 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5644 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5645 "or subtarget optimization implying it");
5646 cum
->nregs
= ix86_regparm
;
5649 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5650 ? X86_64_REGPARM_MAX
5651 : X86_64_MS_REGPARM_MAX
);
5655 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5658 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5659 ? X86_64_SSE_REGPARM_MAX
5660 : X86_64_MS_SSE_REGPARM_MAX
);
5664 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5665 cum
->warn_avx
= true;
5666 cum
->warn_sse
= true;
5667 cum
->warn_mmx
= true;
5669 /* Because type might mismatch in between caller and callee, we need to
5670 use actual type of function for local calls.
5671 FIXME: cgraph_analyze can be told to actually record if function uses
5672 va_start so for local functions maybe_vaarg can be made aggressive
5674 FIXME: once typesytem is fixed, we won't need this code anymore. */
5675 if (i
&& i
->local
&& i
->can_change_signature
)
5676 fntype
= TREE_TYPE (fndecl
);
5677 cum
->maybe_vaarg
= (fntype
5678 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5683 /* If there are variable arguments, then we won't pass anything
5684 in registers in 32-bit mode. */
5685 if (stdarg_p (fntype
))
5696 /* Use ecx and edx registers if function has fastcall attribute,
5697 else look for regparm information. */
5700 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5701 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5704 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5706 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5712 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5715 /* Set up the number of SSE registers used for passing SFmode
5716 and DFmode arguments. Warn for mismatching ABI. */
5717 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5721 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5722 But in the case of vector types, it is some vector mode.
5724 When we have only some of our vector isa extensions enabled, then there
5725 are some modes for which vector_mode_supported_p is false. For these
5726 modes, the generic vector support in gcc will choose some non-vector mode
5727 in order to implement the type. By computing the natural mode, we'll
5728 select the proper ABI location for the operand and not depend on whatever
5729 the middle-end decides to do with these vector types.
5731 The midde-end can't deal with the vector types > 16 bytes. In this
5732 case, we return the original mode and warn ABI change if CUM isn't
5735 static enum machine_mode
5736 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5738 enum machine_mode mode
= TYPE_MODE (type
);
5740 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5742 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5743 if ((size
== 8 || size
== 16 || size
== 32)
5744 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5745 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5747 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5749 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5750 mode
= MIN_MODE_VECTOR_FLOAT
;
5752 mode
= MIN_MODE_VECTOR_INT
;
5754 /* Get the mode which has this inner mode and number of units. */
5755 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5756 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5757 && GET_MODE_INNER (mode
) == innermode
)
5759 if (size
== 32 && !TARGET_AVX
)
5761 static bool warnedavx
;
5768 warning (0, "AVX vector argument without AVX "
5769 "enabled changes the ABI");
5771 return TYPE_MODE (type
);
5784 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5785 this may not agree with the mode that the type system has chosen for the
5786 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5787 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5790 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5795 if (orig_mode
!= BLKmode
)
5796 tmp
= gen_rtx_REG (orig_mode
, regno
);
5799 tmp
= gen_rtx_REG (mode
, regno
);
5800 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5801 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5807 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5808 of this code is to classify each 8bytes of incoming argument by the register
5809 class and assign registers accordingly. */
5811 /* Return the union class of CLASS1 and CLASS2.
5812 See the x86-64 PS ABI for details. */
5814 static enum x86_64_reg_class
5815 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5817 /* Rule #1: If both classes are equal, this is the resulting class. */
5818 if (class1
== class2
)
5821 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5823 if (class1
== X86_64_NO_CLASS
)
5825 if (class2
== X86_64_NO_CLASS
)
5828 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5829 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5830 return X86_64_MEMORY_CLASS
;
5832 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5833 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5834 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5835 return X86_64_INTEGERSI_CLASS
;
5836 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5837 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5838 return X86_64_INTEGER_CLASS
;
5840 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5842 if (class1
== X86_64_X87_CLASS
5843 || class1
== X86_64_X87UP_CLASS
5844 || class1
== X86_64_COMPLEX_X87_CLASS
5845 || class2
== X86_64_X87_CLASS
5846 || class2
== X86_64_X87UP_CLASS
5847 || class2
== X86_64_COMPLEX_X87_CLASS
)
5848 return X86_64_MEMORY_CLASS
;
5850 /* Rule #6: Otherwise class SSE is used. */
5851 return X86_64_SSE_CLASS
;
5854 /* Classify the argument of type TYPE and mode MODE.
5855 CLASSES will be filled by the register class used to pass each word
5856 of the operand. The number of words is returned. In case the parameter
5857 should be passed in memory, 0 is returned. As a special case for zero
5858 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5860 BIT_OFFSET is used internally for handling records and specifies offset
5861 of the offset in bits modulo 256 to avoid overflow cases.
5863 See the x86-64 PS ABI for details.
5867 classify_argument (enum machine_mode mode
, const_tree type
,
5868 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5870 HOST_WIDE_INT bytes
=
5871 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5873 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5875 /* Variable sized entities are always passed/returned in memory. */
5879 if (mode
!= VOIDmode
5880 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5883 if (type
&& AGGREGATE_TYPE_P (type
))
5887 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5889 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5893 for (i
= 0; i
< words
; i
++)
5894 classes
[i
] = X86_64_NO_CLASS
;
5896 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5897 signalize memory class, so handle it as special case. */
5900 classes
[0] = X86_64_NO_CLASS
;
5904 /* Classify each field of record and merge classes. */
5905 switch (TREE_CODE (type
))
5908 /* And now merge the fields of structure. */
5909 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5911 if (TREE_CODE (field
) == FIELD_DECL
)
5915 if (TREE_TYPE (field
) == error_mark_node
)
5918 /* Bitfields are always classified as integer. Handle them
5919 early, since later code would consider them to be
5920 misaligned integers. */
5921 if (DECL_BIT_FIELD (field
))
5923 for (i
= (int_bit_position (field
)
5924 + (bit_offset
% 64)) / 8 / 8;
5925 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5926 + tree_low_cst (DECL_SIZE (field
), 0)
5929 merge_classes (X86_64_INTEGER_CLASS
,
5936 type
= TREE_TYPE (field
);
5938 /* Flexible array member is ignored. */
5939 if (TYPE_MODE (type
) == BLKmode
5940 && TREE_CODE (type
) == ARRAY_TYPE
5941 && TYPE_SIZE (type
) == NULL_TREE
5942 && TYPE_DOMAIN (type
) != NULL_TREE
5943 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5948 if (!warned
&& warn_psabi
)
5951 inform (input_location
,
5952 "the ABI of passing struct with"
5953 " a flexible array member has"
5954 " changed in GCC 4.4");
5958 num
= classify_argument (TYPE_MODE (type
), type
,
5960 (int_bit_position (field
)
5961 + bit_offset
) % 256);
5964 pos
= (int_bit_position (field
)
5965 + (bit_offset
% 64)) / 8 / 8;
5966 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
5968 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
5975 /* Arrays are handled as small records. */
5978 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
5979 TREE_TYPE (type
), subclasses
, bit_offset
);
5983 /* The partial classes are now full classes. */
5984 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
5985 subclasses
[0] = X86_64_SSE_CLASS
;
5986 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
5987 && !((bit_offset
% 64) == 0 && bytes
== 4))
5988 subclasses
[0] = X86_64_INTEGER_CLASS
;
5990 for (i
= 0; i
< words
; i
++)
5991 classes
[i
] = subclasses
[i
% num
];
5996 case QUAL_UNION_TYPE
:
5997 /* Unions are similar to RECORD_TYPE but offset is always 0.
5999 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6001 if (TREE_CODE (field
) == FIELD_DECL
)
6005 if (TREE_TYPE (field
) == error_mark_node
)
6008 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6009 TREE_TYPE (field
), subclasses
,
6013 for (i
= 0; i
< num
; i
++)
6014 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6025 /* When size > 16 bytes, if the first one isn't
6026 X86_64_SSE_CLASS or any other ones aren't
6027 X86_64_SSEUP_CLASS, everything should be passed in
6029 if (classes
[0] != X86_64_SSE_CLASS
)
6032 for (i
= 1; i
< words
; i
++)
6033 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6037 /* Final merger cleanup. */
6038 for (i
= 0; i
< words
; i
++)
6040 /* If one class is MEMORY, everything should be passed in
6042 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6045 /* The X86_64_SSEUP_CLASS should be always preceded by
6046 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6047 if (classes
[i
] == X86_64_SSEUP_CLASS
6048 && classes
[i
- 1] != X86_64_SSE_CLASS
6049 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6051 /* The first one should never be X86_64_SSEUP_CLASS. */
6052 gcc_assert (i
!= 0);
6053 classes
[i
] = X86_64_SSE_CLASS
;
6056 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6057 everything should be passed in memory. */
6058 if (classes
[i
] == X86_64_X87UP_CLASS
6059 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6063 /* The first one should never be X86_64_X87UP_CLASS. */
6064 gcc_assert (i
!= 0);
6065 if (!warned
&& warn_psabi
)
6068 inform (input_location
,
6069 "the ABI of passing union with long double"
6070 " has changed in GCC 4.4");
6078 /* Compute alignment needed. We align all types to natural boundaries with
6079 exception of XFmode that is aligned to 64bits. */
6080 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6082 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6085 mode_alignment
= 128;
6086 else if (mode
== XCmode
)
6087 mode_alignment
= 256;
6088 if (COMPLEX_MODE_P (mode
))
6089 mode_alignment
/= 2;
6090 /* Misaligned fields are always returned in memory. */
6091 if (bit_offset
% mode_alignment
)
6095 /* for V1xx modes, just use the base mode */
6096 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6097 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6098 mode
= GET_MODE_INNER (mode
);
6100 /* Classification of atomic types. */
6105 classes
[0] = X86_64_SSE_CLASS
;
6108 classes
[0] = X86_64_SSE_CLASS
;
6109 classes
[1] = X86_64_SSEUP_CLASS
;
6119 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6123 classes
[0] = X86_64_INTEGERSI_CLASS
;
6126 else if (size
<= 64)
6128 classes
[0] = X86_64_INTEGER_CLASS
;
6131 else if (size
<= 64+32)
6133 classes
[0] = X86_64_INTEGER_CLASS
;
6134 classes
[1] = X86_64_INTEGERSI_CLASS
;
6137 else if (size
<= 64+64)
6139 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6147 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6151 /* OImode shouldn't be used directly. */
6156 if (!(bit_offset
% 64))
6157 classes
[0] = X86_64_SSESF_CLASS
;
6159 classes
[0] = X86_64_SSE_CLASS
;
6162 classes
[0] = X86_64_SSEDF_CLASS
;
6165 classes
[0] = X86_64_X87_CLASS
;
6166 classes
[1] = X86_64_X87UP_CLASS
;
6169 classes
[0] = X86_64_SSE_CLASS
;
6170 classes
[1] = X86_64_SSEUP_CLASS
;
6173 classes
[0] = X86_64_SSE_CLASS
;
6174 if (!(bit_offset
% 64))
6180 if (!warned
&& warn_psabi
)
6183 inform (input_location
,
6184 "the ABI of passing structure with complex float"
6185 " member has changed in GCC 4.4");
6187 classes
[1] = X86_64_SSESF_CLASS
;
6191 classes
[0] = X86_64_SSEDF_CLASS
;
6192 classes
[1] = X86_64_SSEDF_CLASS
;
6195 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6198 /* This modes is larger than 16 bytes. */
6206 classes
[0] = X86_64_SSE_CLASS
;
6207 classes
[1] = X86_64_SSEUP_CLASS
;
6208 classes
[2] = X86_64_SSEUP_CLASS
;
6209 classes
[3] = X86_64_SSEUP_CLASS
;
6217 classes
[0] = X86_64_SSE_CLASS
;
6218 classes
[1] = X86_64_SSEUP_CLASS
;
6226 classes
[0] = X86_64_SSE_CLASS
;
6232 gcc_assert (VECTOR_MODE_P (mode
));
6237 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6239 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6240 classes
[0] = X86_64_INTEGERSI_CLASS
;
6242 classes
[0] = X86_64_INTEGER_CLASS
;
6243 classes
[1] = X86_64_INTEGER_CLASS
;
6244 return 1 + (bytes
> 8);
6248 /* Examine the argument and return set number of register required in each
6249 class. Return 0 iff parameter should be passed in memory. */
6251 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6252 int *int_nregs
, int *sse_nregs
)
6254 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6255 int n
= classify_argument (mode
, type
, regclass
, 0);
6261 for (n
--; n
>= 0; n
--)
6262 switch (regclass
[n
])
6264 case X86_64_INTEGER_CLASS
:
6265 case X86_64_INTEGERSI_CLASS
:
6268 case X86_64_SSE_CLASS
:
6269 case X86_64_SSESF_CLASS
:
6270 case X86_64_SSEDF_CLASS
:
6273 case X86_64_NO_CLASS
:
6274 case X86_64_SSEUP_CLASS
:
6276 case X86_64_X87_CLASS
:
6277 case X86_64_X87UP_CLASS
:
6281 case X86_64_COMPLEX_X87_CLASS
:
6282 return in_return
? 2 : 0;
6283 case X86_64_MEMORY_CLASS
:
6289 /* Construct container for the argument used by GCC interface. See
6290 FUNCTION_ARG for the detailed description. */
6293 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6294 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6295 const int *intreg
, int sse_regno
)
6297 /* The following variables hold the static issued_error state. */
6298 static bool issued_sse_arg_error
;
6299 static bool issued_sse_ret_error
;
6300 static bool issued_x87_ret_error
;
6302 enum machine_mode tmpmode
;
6304 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6305 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6309 int needed_sseregs
, needed_intregs
;
6310 rtx exp
[MAX_CLASSES
];
6313 n
= classify_argument (mode
, type
, regclass
, 0);
6316 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6319 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6322 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6323 some less clueful developer tries to use floating-point anyway. */
6324 if (needed_sseregs
&& !TARGET_SSE
)
6328 if (!issued_sse_ret_error
)
6330 error ("SSE register return with SSE disabled");
6331 issued_sse_ret_error
= true;
6334 else if (!issued_sse_arg_error
)
6336 error ("SSE register argument with SSE disabled");
6337 issued_sse_arg_error
= true;
6342 /* Likewise, error if the ABI requires us to return values in the
6343 x87 registers and the user specified -mno-80387. */
6344 if (!TARGET_80387
&& in_return
)
6345 for (i
= 0; i
< n
; i
++)
6346 if (regclass
[i
] == X86_64_X87_CLASS
6347 || regclass
[i
] == X86_64_X87UP_CLASS
6348 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6350 if (!issued_x87_ret_error
)
6352 error ("x87 register return with x87 disabled");
6353 issued_x87_ret_error
= true;
6358 /* First construct simple cases. Avoid SCmode, since we want to use
6359 single register to pass this type. */
6360 if (n
== 1 && mode
!= SCmode
)
6361 switch (regclass
[0])
6363 case X86_64_INTEGER_CLASS
:
6364 case X86_64_INTEGERSI_CLASS
:
6365 return gen_rtx_REG (mode
, intreg
[0]);
6366 case X86_64_SSE_CLASS
:
6367 case X86_64_SSESF_CLASS
:
6368 case X86_64_SSEDF_CLASS
:
6369 if (mode
!= BLKmode
)
6370 return gen_reg_or_parallel (mode
, orig_mode
,
6371 SSE_REGNO (sse_regno
));
6373 case X86_64_X87_CLASS
:
6374 case X86_64_COMPLEX_X87_CLASS
:
6375 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6376 case X86_64_NO_CLASS
:
6377 /* Zero sized array, struct or class. */
6383 && regclass
[0] == X86_64_SSE_CLASS
6384 && regclass
[1] == X86_64_SSEUP_CLASS
6386 return gen_reg_or_parallel (mode
, orig_mode
,
6387 SSE_REGNO (sse_regno
));
6389 && regclass
[0] == X86_64_SSE_CLASS
6390 && regclass
[1] == X86_64_SSEUP_CLASS
6391 && regclass
[2] == X86_64_SSEUP_CLASS
6392 && regclass
[3] == X86_64_SSEUP_CLASS
6394 return gen_reg_or_parallel (mode
, orig_mode
,
6395 SSE_REGNO (sse_regno
));
6397 && regclass
[0] == X86_64_X87_CLASS
6398 && regclass
[1] == X86_64_X87UP_CLASS
)
6399 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6402 && regclass
[0] == X86_64_INTEGER_CLASS
6403 && regclass
[1] == X86_64_INTEGER_CLASS
6404 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6405 && intreg
[0] + 1 == intreg
[1])
6406 return gen_rtx_REG (mode
, intreg
[0]);
6408 /* Otherwise figure out the entries of the PARALLEL. */
6409 for (i
= 0; i
< n
; i
++)
6413 switch (regclass
[i
])
6415 case X86_64_NO_CLASS
:
6417 case X86_64_INTEGER_CLASS
:
6418 case X86_64_INTEGERSI_CLASS
:
6419 /* Merge TImodes on aligned occasions here too. */
6420 if (i
* 8 + 8 > bytes
)
6422 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6423 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6427 /* We've requested 24 bytes we
6428 don't have mode for. Use DImode. */
6429 if (tmpmode
== BLKmode
)
6432 = gen_rtx_EXPR_LIST (VOIDmode
,
6433 gen_rtx_REG (tmpmode
, *intreg
),
6437 case X86_64_SSESF_CLASS
:
6439 = gen_rtx_EXPR_LIST (VOIDmode
,
6440 gen_rtx_REG (SFmode
,
6441 SSE_REGNO (sse_regno
)),
6445 case X86_64_SSEDF_CLASS
:
6447 = gen_rtx_EXPR_LIST (VOIDmode
,
6448 gen_rtx_REG (DFmode
,
6449 SSE_REGNO (sse_regno
)),
6453 case X86_64_SSE_CLASS
:
6461 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6471 && regclass
[1] == X86_64_SSEUP_CLASS
6472 && regclass
[2] == X86_64_SSEUP_CLASS
6473 && regclass
[3] == X86_64_SSEUP_CLASS
);
6481 = gen_rtx_EXPR_LIST (VOIDmode
,
6482 gen_rtx_REG (tmpmode
,
6483 SSE_REGNO (sse_regno
)),
6492 /* Empty aligned struct, union or class. */
6496 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6497 for (i
= 0; i
< nexps
; i
++)
6498 XVECEXP (ret
, 0, i
) = exp
[i
];
6502 /* Update the data in CUM to advance over an argument of mode MODE
6503 and data type TYPE. (TYPE is null for libcalls where that information
6504 may not be available.) */
6507 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6508 const_tree type
, HOST_WIDE_INT bytes
,
6509 HOST_WIDE_INT words
)
6525 cum
->words
+= words
;
6526 cum
->nregs
-= words
;
6527 cum
->regno
+= words
;
6529 if (cum
->nregs
<= 0)
6537 /* OImode shouldn't be used directly. */
6541 if (cum
->float_in_sse
< 2)
6544 if (cum
->float_in_sse
< 1)
6561 if (!type
|| !AGGREGATE_TYPE_P (type
))
6563 cum
->sse_words
+= words
;
6564 cum
->sse_nregs
-= 1;
6565 cum
->sse_regno
+= 1;
6566 if (cum
->sse_nregs
<= 0)
6580 if (!type
|| !AGGREGATE_TYPE_P (type
))
6582 cum
->mmx_words
+= words
;
6583 cum
->mmx_nregs
-= 1;
6584 cum
->mmx_regno
+= 1;
6585 if (cum
->mmx_nregs
<= 0)
6596 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6597 const_tree type
, HOST_WIDE_INT words
, bool named
)
6599 int int_nregs
, sse_nregs
;
6601 /* Unnamed 256bit vector mode parameters are passed on stack. */
6602 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6605 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6606 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6608 cum
->nregs
-= int_nregs
;
6609 cum
->sse_nregs
-= sse_nregs
;
6610 cum
->regno
+= int_nregs
;
6611 cum
->sse_regno
+= sse_nregs
;
6615 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6616 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6617 cum
->words
+= words
;
6622 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6623 HOST_WIDE_INT words
)
6625 /* Otherwise, this should be passed indirect. */
6626 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6628 cum
->words
+= words
;
6636 /* Update the data in CUM to advance over an argument of mode MODE and
6637 data type TYPE. (TYPE is null for libcalls where that information
6638 may not be available.) */
6641 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6642 const_tree type
, bool named
)
6644 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6645 HOST_WIDE_INT bytes
, words
;
6647 if (mode
== BLKmode
)
6648 bytes
= int_size_in_bytes (type
);
6650 bytes
= GET_MODE_SIZE (mode
);
6651 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6654 mode
= type_natural_mode (type
, NULL
);
6656 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6657 function_arg_advance_ms_64 (cum
, bytes
, words
);
6658 else if (TARGET_64BIT
)
6659 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6661 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6664 /* Define where to put the arguments to a function.
6665 Value is zero to push the argument on the stack,
6666 or a hard register in which to store the argument.
6668 MODE is the argument's machine mode.
6669 TYPE is the data type of the argument (as a tree).
6670 This is null for libcalls where that information may
6672 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6673 the preceding args and about the function being called.
6674 NAMED is nonzero if this argument is a named parameter
6675 (otherwise it is an extra parameter matching an ellipsis). */
6678 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6679 enum machine_mode orig_mode
, const_tree type
,
6680 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6682 static bool warnedsse
, warnedmmx
;
6684 /* Avoid the AL settings for the Unix64 ABI. */
6685 if (mode
== VOIDmode
)
6701 if (words
<= cum
->nregs
)
6703 int regno
= cum
->regno
;
6705 /* Fastcall allocates the first two DWORD (SImode) or
6706 smaller arguments to ECX and EDX if it isn't an
6712 || (type
&& AGGREGATE_TYPE_P (type
)))
6715 /* ECX not EAX is the first allocated register. */
6716 if (regno
== AX_REG
)
6719 return gen_rtx_REG (mode
, regno
);
6724 if (cum
->float_in_sse
< 2)
6727 if (cum
->float_in_sse
< 1)
6731 /* In 32bit, we pass TImode in xmm registers. */
6738 if (!type
|| !AGGREGATE_TYPE_P (type
))
6740 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6743 warning (0, "SSE vector argument without SSE enabled "
6747 return gen_reg_or_parallel (mode
, orig_mode
,
6748 cum
->sse_regno
+ FIRST_SSE_REG
);
6753 /* OImode shouldn't be used directly. */
6762 if (!type
|| !AGGREGATE_TYPE_P (type
))
6765 return gen_reg_or_parallel (mode
, orig_mode
,
6766 cum
->sse_regno
+ FIRST_SSE_REG
);
6776 if (!type
|| !AGGREGATE_TYPE_P (type
))
6778 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6781 warning (0, "MMX vector argument without MMX enabled "
6785 return gen_reg_or_parallel (mode
, orig_mode
,
6786 cum
->mmx_regno
+ FIRST_MMX_REG
);
6795 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6796 enum machine_mode orig_mode
, const_tree type
, bool named
)
6798 /* Handle a hidden AL argument containing number of registers
6799 for varargs x86-64 functions. */
6800 if (mode
== VOIDmode
)
6801 return GEN_INT (cum
->maybe_vaarg
6802 ? (cum
->sse_nregs
< 0
6803 ? X86_64_SSE_REGPARM_MAX
6818 /* Unnamed 256bit vector mode parameters are passed on stack. */
6824 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6826 &x86_64_int_parameter_registers
[cum
->regno
],
6831 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6832 enum machine_mode orig_mode
, bool named
,
6833 HOST_WIDE_INT bytes
)
6837 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6838 We use value of -2 to specify that current function call is MSABI. */
6839 if (mode
== VOIDmode
)
6840 return GEN_INT (-2);
6842 /* If we've run out of registers, it goes on the stack. */
6843 if (cum
->nregs
== 0)
6846 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6848 /* Only floating point modes are passed in anything but integer regs. */
6849 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6852 regno
= cum
->regno
+ FIRST_SSE_REG
;
6857 /* Unnamed floating parameters are passed in both the
6858 SSE and integer registers. */
6859 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6860 t2
= gen_rtx_REG (mode
, regno
);
6861 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6862 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6863 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6866 /* Handle aggregated types passed in register. */
6867 if (orig_mode
== BLKmode
)
6869 if (bytes
> 0 && bytes
<= 8)
6870 mode
= (bytes
> 4 ? DImode
: SImode
);
6871 if (mode
== BLKmode
)
6875 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6878 /* Return where to put the arguments to a function.
6879 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6881 MODE is the argument's machine mode. TYPE is the data type of the
6882 argument. It is null for libcalls where that information may not be
6883 available. CUM gives information about the preceding args and about
6884 the function being called. NAMED is nonzero if this argument is a
6885 named parameter (otherwise it is an extra parameter matching an
6889 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6890 const_tree type
, bool named
)
6892 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6893 enum machine_mode mode
= omode
;
6894 HOST_WIDE_INT bytes
, words
;
6897 if (mode
== BLKmode
)
6898 bytes
= int_size_in_bytes (type
);
6900 bytes
= GET_MODE_SIZE (mode
);
6901 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6903 /* To simplify the code below, represent vector types with a vector mode
6904 even if MMX/SSE are not active. */
6905 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6906 mode
= type_natural_mode (type
, cum
);
6908 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6909 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6910 else if (TARGET_64BIT
)
6911 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6913 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6915 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
6917 /* This argument uses 256bit AVX modes. */
6919 cfun
->machine
->callee_pass_avx256_p
= true;
6921 cfun
->machine
->caller_pass_avx256_p
= true;
6927 /* A C expression that indicates when an argument must be passed by
6928 reference. If nonzero for an argument, a copy of that argument is
6929 made in memory and a pointer to the argument is passed instead of
6930 the argument itself. The pointer is passed in whatever way is
6931 appropriate for passing a pointer to that type. */
6934 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6935 enum machine_mode mode ATTRIBUTE_UNUSED
,
6936 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6938 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6940 /* See Windows x64 Software Convention. */
6941 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6943 int msize
= (int) GET_MODE_SIZE (mode
);
6946 /* Arrays are passed by reference. */
6947 if (TREE_CODE (type
) == ARRAY_TYPE
)
6950 if (AGGREGATE_TYPE_P (type
))
6952 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6953 are passed by reference. */
6954 msize
= int_size_in_bytes (type
);
6958 /* __m128 is passed by reference. */
6960 case 1: case 2: case 4: case 8:
6966 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
6972 /* Return true when TYPE should be 128bit aligned for 32bit argument
6973 passing ABI. XXX: This function is obsolete and is only used for
6974 checking psABI compatibility with previous versions of GCC. */
6977 ix86_compat_aligned_value_p (const_tree type
)
6979 enum machine_mode mode
= TYPE_MODE (type
);
6980 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
6984 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
6986 if (TYPE_ALIGN (type
) < 128)
6989 if (AGGREGATE_TYPE_P (type
))
6991 /* Walk the aggregates recursively. */
6992 switch (TREE_CODE (type
))
6996 case QUAL_UNION_TYPE
:
7000 /* Walk all the structure fields. */
7001 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7003 if (TREE_CODE (field
) == FIELD_DECL
7004 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7011 /* Just for use if some languages passes arrays by value. */
7012 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7023 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7024 XXX: This function is obsolete and is only used for checking psABI
7025 compatibility with previous versions of GCC. */
7028 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7029 const_tree type
, unsigned int align
)
7031 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7032 natural boundaries. */
7033 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7035 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7036 make an exception for SSE modes since these require 128bit
7039 The handling here differs from field_alignment. ICC aligns MMX
7040 arguments to 4 byte boundaries, while structure fields are aligned
7041 to 8 byte boundaries. */
7044 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7045 align
= PARM_BOUNDARY
;
7049 if (!ix86_compat_aligned_value_p (type
))
7050 align
= PARM_BOUNDARY
;
7053 if (align
> BIGGEST_ALIGNMENT
)
7054 align
= BIGGEST_ALIGNMENT
;
7058 /* Return true when TYPE should be 128bit aligned for 32bit argument
7062 ix86_contains_aligned_value_p (const_tree type
)
7064 enum machine_mode mode
= TYPE_MODE (type
);
7066 if (mode
== XFmode
|| mode
== XCmode
)
7069 if (TYPE_ALIGN (type
) < 128)
7072 if (AGGREGATE_TYPE_P (type
))
7074 /* Walk the aggregates recursively. */
7075 switch (TREE_CODE (type
))
7079 case QUAL_UNION_TYPE
:
7083 /* Walk all the structure fields. */
7084 for (field
= TYPE_FIELDS (type
);
7086 field
= DECL_CHAIN (field
))
7088 if (TREE_CODE (field
) == FIELD_DECL
7089 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7096 /* Just for use if some languages passes arrays by value. */
7097 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7106 return TYPE_ALIGN (type
) >= 128;
7111 /* Gives the alignment boundary, in bits, of an argument with the
7112 specified mode and type. */
7115 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7120 /* Since the main variant type is used for call, we convert it to
7121 the main variant type. */
7122 type
= TYPE_MAIN_VARIANT (type
);
7123 align
= TYPE_ALIGN (type
);
7126 align
= GET_MODE_ALIGNMENT (mode
);
7127 if (align
< PARM_BOUNDARY
)
7128 align
= PARM_BOUNDARY
;
7132 unsigned int saved_align
= align
;
7136 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7139 if (mode
== XFmode
|| mode
== XCmode
)
7140 align
= PARM_BOUNDARY
;
7142 else if (!ix86_contains_aligned_value_p (type
))
7143 align
= PARM_BOUNDARY
;
7146 align
= PARM_BOUNDARY
;
7151 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7155 inform (input_location
,
7156 "The ABI for passing parameters with %d-byte"
7157 " alignment has changed in GCC 4.6",
7158 align
/ BITS_PER_UNIT
);
7165 /* Return true if N is a possible register number of function value. */
7168 ix86_function_value_regno_p (const unsigned int regno
)
7175 case FIRST_FLOAT_REG
:
7176 /* TODO: The function should depend on current function ABI but
7177 builtins.c would need updating then. Therefore we use the
7179 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7181 return TARGET_FLOAT_RETURNS_IN_80387
;
7187 if (TARGET_MACHO
|| TARGET_64BIT
)
7195 /* Define how to find the value returned by a function.
7196 VALTYPE is the data type of the value (as a tree).
7197 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7198 otherwise, FUNC is 0. */
7201 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7202 const_tree fntype
, const_tree fn
)
7206 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7207 we normally prevent this case when mmx is not available. However
7208 some ABIs may require the result to be returned like DImode. */
7209 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7210 regno
= FIRST_MMX_REG
;
7212 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7213 we prevent this case when sse is not available. However some ABIs
7214 may require the result to be returned like integer TImode. */
7215 else if (mode
== TImode
7216 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7217 regno
= FIRST_SSE_REG
;
7219 /* 32-byte vector modes in %ymm0. */
7220 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7221 regno
= FIRST_SSE_REG
;
7223 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7224 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7225 regno
= FIRST_FLOAT_REG
;
7227 /* Most things go in %eax. */
7230 /* Override FP return register with %xmm0 for local functions when
7231 SSE math is enabled or for functions with sseregparm attribute. */
7232 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7234 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7235 if ((sse_level
>= 1 && mode
== SFmode
)
7236 || (sse_level
== 2 && mode
== DFmode
))
7237 regno
= FIRST_SSE_REG
;
7240 /* OImode shouldn't be used directly. */
7241 gcc_assert (mode
!= OImode
);
7243 return gen_rtx_REG (orig_mode
, regno
);
7247 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7252 /* Handle libcalls, which don't provide a type node. */
7253 if (valtype
== NULL
)
7267 regno
= FIRST_SSE_REG
;
7271 regno
= FIRST_FLOAT_REG
;
7279 return gen_rtx_REG (mode
, regno
);
7281 else if (POINTER_TYPE_P (valtype
))
7283 /* Pointers are always returned in word_mode. */
7287 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7288 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7289 x86_64_int_return_registers
, 0);
7291 /* For zero sized structures, construct_container returns NULL, but we
7292 need to keep rest of compiler happy by returning meaningful value. */
7294 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7300 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7302 unsigned int regno
= AX_REG
;
7306 switch (GET_MODE_SIZE (mode
))
7309 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7310 && !COMPLEX_MODE_P (mode
))
7311 regno
= FIRST_SSE_REG
;
7315 if (mode
== SFmode
|| mode
== DFmode
)
7316 regno
= FIRST_SSE_REG
;
7322 return gen_rtx_REG (orig_mode
, regno
);
7326 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7327 enum machine_mode orig_mode
, enum machine_mode mode
)
7329 const_tree fn
, fntype
;
7332 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7333 fn
= fntype_or_decl
;
7334 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7336 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7337 return function_value_ms_64 (orig_mode
, mode
);
7338 else if (TARGET_64BIT
)
7339 return function_value_64 (orig_mode
, mode
, valtype
);
7341 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7345 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7346 bool outgoing ATTRIBUTE_UNUSED
)
7348 enum machine_mode mode
, orig_mode
;
7350 orig_mode
= TYPE_MODE (valtype
);
7351 mode
= type_natural_mode (valtype
, NULL
);
7352 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7355 /* Pointer function arguments and return values are promoted to
7358 static enum machine_mode
7359 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7360 int *punsignedp
, const_tree fntype
,
7363 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7365 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7368 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7373 ix86_libcall_value (enum machine_mode mode
)
7375 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7378 /* Return true iff type is returned in memory. */
7380 static bool ATTRIBUTE_UNUSED
7381 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7385 if (mode
== BLKmode
)
7388 size
= int_size_in_bytes (type
);
7390 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7393 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7395 /* User-created vectors small enough to fit in EAX. */
7399 /* MMX/3dNow values are returned in MM0,
7400 except when it doesn't exits or the ABI prescribes otherwise. */
7402 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7404 /* SSE values are returned in XMM0, except when it doesn't exist. */
7408 /* AVX values are returned in YMM0, except when it doesn't exist. */
7419 /* OImode shouldn't be used directly. */
7420 gcc_assert (mode
!= OImode
);
7425 static bool ATTRIBUTE_UNUSED
7426 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7428 int needed_intregs
, needed_sseregs
;
7429 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7432 static bool ATTRIBUTE_UNUSED
7433 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7435 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7437 /* __m128 is returned in xmm0. */
7438 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7439 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7442 /* Otherwise, the size must be exactly in [1248]. */
7443 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7447 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7449 #ifdef SUBTARGET_RETURN_IN_MEMORY
7450 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7452 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7456 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7457 return return_in_memory_ms_64 (type
, mode
);
7459 return return_in_memory_64 (type
, mode
);
7462 return return_in_memory_32 (type
, mode
);
7466 /* When returning SSE vector types, we have a choice of either
7467 (1) being abi incompatible with a -march switch, or
7468 (2) generating an error.
7469 Given no good solution, I think the safest thing is one warning.
7470 The user won't be able to use -Werror, but....
7472 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7473 called in response to actually generating a caller or callee that
7474 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7475 via aggregate_value_p for general type probing from tree-ssa. */
7478 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7480 static bool warnedsse
, warnedmmx
;
7482 if (!TARGET_64BIT
&& type
)
7484 /* Look at the return type of the function, not the function type. */
7485 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7487 if (!TARGET_SSE
&& !warnedsse
)
7490 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7493 warning (0, "SSE vector return without SSE enabled "
7498 if (!TARGET_MMX
&& !warnedmmx
)
7500 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7503 warning (0, "MMX vector return without MMX enabled "
7513 /* Create the va_list data type. */
7515 /* Returns the calling convention specific va_list date type.
7516 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7519 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7521 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7523 /* For i386 we use plain pointer to argument area. */
7524 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7525 return build_pointer_type (char_type_node
);
7527 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7528 type_decl
= build_decl (BUILTINS_LOCATION
,
7529 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7531 f_gpr
= build_decl (BUILTINS_LOCATION
,
7532 FIELD_DECL
, get_identifier ("gp_offset"),
7533 unsigned_type_node
);
7534 f_fpr
= build_decl (BUILTINS_LOCATION
,
7535 FIELD_DECL
, get_identifier ("fp_offset"),
7536 unsigned_type_node
);
7537 f_ovf
= build_decl (BUILTINS_LOCATION
,
7538 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7540 f_sav
= build_decl (BUILTINS_LOCATION
,
7541 FIELD_DECL
, get_identifier ("reg_save_area"),
7544 va_list_gpr_counter_field
= f_gpr
;
7545 va_list_fpr_counter_field
= f_fpr
;
7547 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7548 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7549 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7550 DECL_FIELD_CONTEXT (f_sav
) = record
;
7552 TYPE_STUB_DECL (record
) = type_decl
;
7553 TYPE_NAME (record
) = type_decl
;
7554 TYPE_FIELDS (record
) = f_gpr
;
7555 DECL_CHAIN (f_gpr
) = f_fpr
;
7556 DECL_CHAIN (f_fpr
) = f_ovf
;
7557 DECL_CHAIN (f_ovf
) = f_sav
;
7559 layout_type (record
);
7561 /* The correct type is an array type of one element. */
7562 return build_array_type (record
, build_index_type (size_zero_node
));
7565 /* Setup the builtin va_list data type and for 64-bit the additional
7566 calling convention specific va_list data types. */
7569 ix86_build_builtin_va_list (void)
7571 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7573 /* Initialize abi specific va_list builtin types. */
7577 if (ix86_abi
== MS_ABI
)
7579 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7580 if (TREE_CODE (t
) != RECORD_TYPE
)
7581 t
= build_variant_type_copy (t
);
7582 sysv_va_list_type_node
= t
;
7587 if (TREE_CODE (t
) != RECORD_TYPE
)
7588 t
= build_variant_type_copy (t
);
7589 sysv_va_list_type_node
= t
;
7591 if (ix86_abi
!= MS_ABI
)
7593 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7594 if (TREE_CODE (t
) != RECORD_TYPE
)
7595 t
= build_variant_type_copy (t
);
7596 ms_va_list_type_node
= t
;
7601 if (TREE_CODE (t
) != RECORD_TYPE
)
7602 t
= build_variant_type_copy (t
);
7603 ms_va_list_type_node
= t
;
7610 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7613 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7619 /* GPR size of varargs save area. */
7620 if (cfun
->va_list_gpr_size
)
7621 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7623 ix86_varargs_gpr_size
= 0;
7625 /* FPR size of varargs save area. We don't need it if we don't pass
7626 anything in SSE registers. */
7627 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7628 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7630 ix86_varargs_fpr_size
= 0;
7632 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7635 save_area
= frame_pointer_rtx
;
7636 set
= get_varargs_alias_set ();
7638 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7639 if (max
> X86_64_REGPARM_MAX
)
7640 max
= X86_64_REGPARM_MAX
;
7642 for (i
= cum
->regno
; i
< max
; i
++)
7644 mem
= gen_rtx_MEM (word_mode
,
7645 plus_constant (save_area
, i
* UNITS_PER_WORD
));
7646 MEM_NOTRAP_P (mem
) = 1;
7647 set_mem_alias_set (mem
, set
);
7648 emit_move_insn (mem
,
7649 gen_rtx_REG (word_mode
,
7650 x86_64_int_parameter_registers
[i
]));
7653 if (ix86_varargs_fpr_size
)
7655 enum machine_mode smode
;
7658 /* Now emit code to save SSE registers. The AX parameter contains number
7659 of SSE parameter registers used to call this function, though all we
7660 actually check here is the zero/non-zero status. */
7662 label
= gen_label_rtx ();
7663 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7664 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7667 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7668 we used movdqa (i.e. TImode) instead? Perhaps even better would
7669 be if we could determine the real mode of the data, via a hook
7670 into pass_stdarg. Ignore all that for now. */
7672 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7673 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7675 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7676 if (max
> X86_64_SSE_REGPARM_MAX
)
7677 max
= X86_64_SSE_REGPARM_MAX
;
7679 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7681 mem
= plus_constant (save_area
, i
* 16 + ix86_varargs_gpr_size
);
7682 mem
= gen_rtx_MEM (smode
, mem
);
7683 MEM_NOTRAP_P (mem
) = 1;
7684 set_mem_alias_set (mem
, set
);
7685 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7687 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7695 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7697 alias_set_type set
= get_varargs_alias_set ();
7700 /* Reset to zero, as there might be a sysv vaarg used
7702 ix86_varargs_gpr_size
= 0;
7703 ix86_varargs_fpr_size
= 0;
7705 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7709 mem
= gen_rtx_MEM (Pmode
,
7710 plus_constant (virtual_incoming_args_rtx
,
7711 i
* UNITS_PER_WORD
));
7712 MEM_NOTRAP_P (mem
) = 1;
7713 set_mem_alias_set (mem
, set
);
7715 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7716 emit_move_insn (mem
, reg
);
7721 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7722 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7725 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7726 CUMULATIVE_ARGS next_cum
;
7729 /* This argument doesn't appear to be used anymore. Which is good,
7730 because the old code here didn't suppress rtl generation. */
7731 gcc_assert (!no_rtl
);
7736 fntype
= TREE_TYPE (current_function_decl
);
7738 /* For varargs, we do not want to skip the dummy va_dcl argument.
7739 For stdargs, we do want to skip the last named argument. */
7741 if (stdarg_p (fntype
))
7742 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7745 if (cum
->call_abi
== MS_ABI
)
7746 setup_incoming_varargs_ms_64 (&next_cum
);
7748 setup_incoming_varargs_64 (&next_cum
);
7751 /* Checks if TYPE is of kind va_list char *. */
7754 is_va_list_char_pointer (tree type
)
7758 /* For 32-bit it is always true. */
7761 canonic
= ix86_canonical_va_list_type (type
);
7762 return (canonic
== ms_va_list_type_node
7763 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7766 /* Implement va_start. */
7769 ix86_va_start (tree valist
, rtx nextarg
)
7771 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7772 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7773 tree gpr
, fpr
, ovf
, sav
, t
;
7777 if (flag_split_stack
7778 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7780 unsigned int scratch_regno
;
7782 /* When we are splitting the stack, we can't refer to the stack
7783 arguments using internal_arg_pointer, because they may be on
7784 the old stack. The split stack prologue will arrange to
7785 leave a pointer to the old stack arguments in a scratch
7786 register, which we here copy to a pseudo-register. The split
7787 stack prologue can't set the pseudo-register directly because
7788 it (the prologue) runs before any registers have been saved. */
7790 scratch_regno
= split_stack_prologue_scratch_regno ();
7791 if (scratch_regno
!= INVALID_REGNUM
)
7795 reg
= gen_reg_rtx (Pmode
);
7796 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7799 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7803 push_topmost_sequence ();
7804 emit_insn_after (seq
, entry_of_function ());
7805 pop_topmost_sequence ();
7809 /* Only 64bit target needs something special. */
7810 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7812 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7813 std_expand_builtin_va_start (valist
, nextarg
);
7818 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7819 next
= expand_binop (ptr_mode
, add_optab
,
7820 cfun
->machine
->split_stack_varargs_pointer
,
7821 crtl
->args
.arg_offset_rtx
,
7822 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7823 convert_move (va_r
, next
, 0);
7828 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7829 f_fpr
= DECL_CHAIN (f_gpr
);
7830 f_ovf
= DECL_CHAIN (f_fpr
);
7831 f_sav
= DECL_CHAIN (f_ovf
);
7833 valist
= build_simple_mem_ref (valist
);
7834 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7835 /* The following should be folded into the MEM_REF offset. */
7836 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7838 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7840 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7842 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7845 /* Count number of gp and fp argument registers used. */
7846 words
= crtl
->args
.info
.words
;
7847 n_gpr
= crtl
->args
.info
.regno
;
7848 n_fpr
= crtl
->args
.info
.sse_regno
;
7850 if (cfun
->va_list_gpr_size
)
7852 type
= TREE_TYPE (gpr
);
7853 t
= build2 (MODIFY_EXPR
, type
,
7854 gpr
, build_int_cst (type
, n_gpr
* 8));
7855 TREE_SIDE_EFFECTS (t
) = 1;
7856 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7859 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7861 type
= TREE_TYPE (fpr
);
7862 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7863 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7864 TREE_SIDE_EFFECTS (t
) = 1;
7865 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7868 /* Find the overflow area. */
7869 type
= TREE_TYPE (ovf
);
7870 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7871 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7873 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7874 t
= make_tree (type
, ovf_rtx
);
7876 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7877 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7878 TREE_SIDE_EFFECTS (t
) = 1;
7879 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7881 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7883 /* Find the register save area.
7884 Prologue of the function save it right above stack frame. */
7885 type
= TREE_TYPE (sav
);
7886 t
= make_tree (type
, frame_pointer_rtx
);
7887 if (!ix86_varargs_gpr_size
)
7888 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7889 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7890 TREE_SIDE_EFFECTS (t
) = 1;
7891 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7895 /* Implement va_arg. */
7898 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7901 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7902 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7903 tree gpr
, fpr
, ovf
, sav
, t
;
7905 tree lab_false
, lab_over
= NULL_TREE
;
7910 enum machine_mode nat_mode
;
7911 unsigned int arg_boundary
;
7913 /* Only 64bit target needs something special. */
7914 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7915 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7917 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7918 f_fpr
= DECL_CHAIN (f_gpr
);
7919 f_ovf
= DECL_CHAIN (f_fpr
);
7920 f_sav
= DECL_CHAIN (f_ovf
);
7922 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7923 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7924 valist
= build_va_arg_indirect_ref (valist
);
7925 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7926 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7927 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7929 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7931 type
= build_pointer_type (type
);
7932 size
= int_size_in_bytes (type
);
7933 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7935 nat_mode
= type_natural_mode (type
, NULL
);
7944 /* Unnamed 256bit vector mode parameters are passed on stack. */
7945 if (!TARGET_64BIT_MS_ABI
)
7952 container
= construct_container (nat_mode
, TYPE_MODE (type
),
7953 type
, 0, X86_64_REGPARM_MAX
,
7954 X86_64_SSE_REGPARM_MAX
, intreg
,
7959 /* Pull the value out of the saved registers. */
7961 addr
= create_tmp_var (ptr_type_node
, "addr");
7965 int needed_intregs
, needed_sseregs
;
7967 tree int_addr
, sse_addr
;
7969 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
7970 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
7972 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
7974 need_temp
= (!REG_P (container
)
7975 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
7976 || TYPE_ALIGN (type
) > 128));
7978 /* In case we are passing structure, verify that it is consecutive block
7979 on the register save area. If not we need to do moves. */
7980 if (!need_temp
&& !REG_P (container
))
7982 /* Verify that all registers are strictly consecutive */
7983 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
7987 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7989 rtx slot
= XVECEXP (container
, 0, i
);
7990 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
7991 || INTVAL (XEXP (slot
, 1)) != i
* 16)
7999 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8001 rtx slot
= XVECEXP (container
, 0, i
);
8002 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8003 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8015 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8016 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8019 /* First ensure that we fit completely in registers. */
8022 t
= build_int_cst (TREE_TYPE (gpr
),
8023 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8024 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8025 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8026 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8027 gimplify_and_add (t
, pre_p
);
8031 t
= build_int_cst (TREE_TYPE (fpr
),
8032 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8033 + X86_64_REGPARM_MAX
* 8);
8034 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8035 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8036 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8037 gimplify_and_add (t
, pre_p
);
8040 /* Compute index to start of area used for integer regs. */
8043 /* int_addr = gpr + sav; */
8044 t
= fold_build_pointer_plus (sav
, gpr
);
8045 gimplify_assign (int_addr
, t
, pre_p
);
8049 /* sse_addr = fpr + sav; */
8050 t
= fold_build_pointer_plus (sav
, fpr
);
8051 gimplify_assign (sse_addr
, t
, pre_p
);
8055 int i
, prev_size
= 0;
8056 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8059 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8060 gimplify_assign (addr
, t
, pre_p
);
8062 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8064 rtx slot
= XVECEXP (container
, 0, i
);
8065 rtx reg
= XEXP (slot
, 0);
8066 enum machine_mode mode
= GET_MODE (reg
);
8072 tree dest_addr
, dest
;
8073 int cur_size
= GET_MODE_SIZE (mode
);
8075 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8076 prev_size
= INTVAL (XEXP (slot
, 1));
8077 if (prev_size
+ cur_size
> size
)
8079 cur_size
= size
- prev_size
;
8080 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8081 if (mode
== BLKmode
)
8084 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8085 if (mode
== GET_MODE (reg
))
8086 addr_type
= build_pointer_type (piece_type
);
8088 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8090 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8093 if (SSE_REGNO_P (REGNO (reg
)))
8095 src_addr
= sse_addr
;
8096 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8100 src_addr
= int_addr
;
8101 src_offset
= REGNO (reg
) * 8;
8103 src_addr
= fold_convert (addr_type
, src_addr
);
8104 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8106 dest_addr
= fold_convert (daddr_type
, addr
);
8107 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8108 if (cur_size
== GET_MODE_SIZE (mode
))
8110 src
= build_va_arg_indirect_ref (src_addr
);
8111 dest
= build_va_arg_indirect_ref (dest_addr
);
8113 gimplify_assign (dest
, src
, pre_p
);
8118 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8119 3, dest_addr
, src_addr
,
8120 size_int (cur_size
));
8121 gimplify_and_add (copy
, pre_p
);
8123 prev_size
+= cur_size
;
8129 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8130 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8131 gimplify_assign (gpr
, t
, pre_p
);
8136 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8137 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8138 gimplify_assign (fpr
, t
, pre_p
);
8141 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8143 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8146 /* ... otherwise out of the overflow area. */
8148 /* When we align parameter on stack for caller, if the parameter
8149 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8150 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8151 here with caller. */
8152 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8153 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8154 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8156 /* Care for on-stack alignment if needed. */
8157 if (arg_boundary
<= 64 || size
== 0)
8161 HOST_WIDE_INT align
= arg_boundary
/ 8;
8162 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8163 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8164 build_int_cst (TREE_TYPE (t
), -align
));
8167 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8168 gimplify_assign (addr
, t
, pre_p
);
8170 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8171 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8174 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8176 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8177 addr
= fold_convert (ptrtype
, addr
);
8180 addr
= build_va_arg_indirect_ref (addr
);
8181 return build_va_arg_indirect_ref (addr
);
8184 /* Return true if OPNUM's MEM should be matched
8185 in movabs* patterns. */
8188 ix86_check_movabs (rtx insn
, int opnum
)
8192 set
= PATTERN (insn
);
8193 if (GET_CODE (set
) == PARALLEL
)
8194 set
= XVECEXP (set
, 0, 0);
8195 gcc_assert (GET_CODE (set
) == SET
);
8196 mem
= XEXP (set
, opnum
);
8197 while (GET_CODE (mem
) == SUBREG
)
8198 mem
= SUBREG_REG (mem
);
8199 gcc_assert (MEM_P (mem
));
8200 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8203 /* Initialize the table of extra 80387 mathematical constants. */
8206 init_ext_80387_constants (void)
8208 static const char * cst
[5] =
8210 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8211 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8212 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8213 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8214 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8218 for (i
= 0; i
< 5; i
++)
8220 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8221 /* Ensure each constant is rounded to XFmode precision. */
8222 real_convert (&ext_80387_constants_table
[i
],
8223 XFmode
, &ext_80387_constants_table
[i
]);
8226 ext_80387_constants_init
= 1;
8229 /* Return non-zero if the constant is something that
8230 can be loaded with a special instruction. */
8233 standard_80387_constant_p (rtx x
)
8235 enum machine_mode mode
= GET_MODE (x
);
8239 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8242 if (x
== CONST0_RTX (mode
))
8244 if (x
== CONST1_RTX (mode
))
8247 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8249 /* For XFmode constants, try to find a special 80387 instruction when
8250 optimizing for size or on those CPUs that benefit from them. */
8252 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8256 if (! ext_80387_constants_init
)
8257 init_ext_80387_constants ();
8259 for (i
= 0; i
< 5; i
++)
8260 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8264 /* Load of the constant -0.0 or -1.0 will be split as
8265 fldz;fchs or fld1;fchs sequence. */
8266 if (real_isnegzero (&r
))
8268 if (real_identical (&r
, &dconstm1
))
8274 /* Return the opcode of the special instruction to be used to load
8278 standard_80387_constant_opcode (rtx x
)
8280 switch (standard_80387_constant_p (x
))
8304 /* Return the CONST_DOUBLE representing the 80387 constant that is
8305 loaded by the specified special instruction. The argument IDX
8306 matches the return value from standard_80387_constant_p. */
8309 standard_80387_constant_rtx (int idx
)
8313 if (! ext_80387_constants_init
)
8314 init_ext_80387_constants ();
8330 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8334 /* Return 1 if X is all 0s and 2 if x is all 1s
8335 in supported SSE/AVX vector mode. */
8338 standard_sse_constant_p (rtx x
)
8340 enum machine_mode mode
= GET_MODE (x
);
8342 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8344 if (vector_all_ones_operand (x
, mode
))
8366 /* Return the opcode of the special instruction to be used to load
8370 standard_sse_constant_opcode (rtx insn
, rtx x
)
8372 switch (standard_sse_constant_p (x
))
8375 switch (get_attr_mode (insn
))
8378 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8379 return "%vpxor\t%0, %d0";
8381 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8382 return "%vxorpd\t%0, %d0";
8384 return "%vxorps\t%0, %d0";
8387 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8388 return "vpxor\t%x0, %x0, %x0";
8390 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8391 return "vxorpd\t%x0, %x0, %x0";
8393 return "vxorps\t%x0, %x0, %x0";
8401 return "vpcmpeqd\t%0, %0, %0";
8403 return "pcmpeqd\t%0, %0";
8411 /* Returns true if OP contains a symbol reference */
8414 symbolic_reference_mentioned_p (rtx op
)
8419 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8422 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8423 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8429 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8430 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8434 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8441 /* Return true if it is appropriate to emit `ret' instructions in the
8442 body of a function. Do this only if the epilogue is simple, needing a
8443 couple of insns. Prior to reloading, we can't tell how many registers
8444 must be saved, so return false then. Return false if there is no frame
8445 marker to de-allocate. */
8448 ix86_can_use_return_insn_p (void)
8450 struct ix86_frame frame
;
8452 if (! reload_completed
|| frame_pointer_needed
)
8455 /* Don't allow more than 32k pop, since that's all we can do
8456 with one instruction. */
8457 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8460 ix86_compute_frame_layout (&frame
);
8461 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8462 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8465 /* Value should be nonzero if functions must have frame pointers.
8466 Zero means the frame pointer need not be set up (and parms may
8467 be accessed via the stack pointer) in functions that seem suitable. */
8470 ix86_frame_pointer_required (void)
8472 /* If we accessed previous frames, then the generated code expects
8473 to be able to access the saved ebp value in our frame. */
8474 if (cfun
->machine
->accesses_prev_frame
)
8477 /* Several x86 os'es need a frame pointer for other reasons,
8478 usually pertaining to setjmp. */
8479 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8482 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8483 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8486 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8487 turns off the frame pointer by default. Turn it back on now if
8488 we've not got a leaf function. */
8489 if (TARGET_OMIT_LEAF_FRAME_POINTER
8490 && (!current_function_is_leaf
8491 || ix86_current_function_calls_tls_descriptor
))
8494 if (crtl
->profile
&& !flag_fentry
)
8500 /* Record that the current function accesses previous call frames. */
8503 ix86_setup_frame_addresses (void)
8505 cfun
->machine
->accesses_prev_frame
= 1;
8508 #ifndef USE_HIDDEN_LINKONCE
8509 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8510 # define USE_HIDDEN_LINKONCE 1
8512 # define USE_HIDDEN_LINKONCE 0
8516 static int pic_labels_used
;
8518 /* Fills in the label name that should be used for a pc thunk for
8519 the given register. */
8522 get_pc_thunk_name (char name
[32], unsigned int regno
)
8524 gcc_assert (!TARGET_64BIT
);
8526 if (USE_HIDDEN_LINKONCE
)
8527 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8529 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8533 /* This function generates code for -fpic that loads %ebx with
8534 the return address of the caller and then returns. */
8537 ix86_code_end (void)
8542 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8547 if (!(pic_labels_used
& (1 << regno
)))
8550 get_pc_thunk_name (name
, regno
);
8552 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8553 get_identifier (name
),
8554 build_function_type_list (void_type_node
, NULL_TREE
));
8555 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8556 NULL_TREE
, void_type_node
);
8557 TREE_PUBLIC (decl
) = 1;
8558 TREE_STATIC (decl
) = 1;
8563 switch_to_section (darwin_sections
[text_coal_section
]);
8564 fputs ("\t.weak_definition\t", asm_out_file
);
8565 assemble_name (asm_out_file
, name
);
8566 fputs ("\n\t.private_extern\t", asm_out_file
);
8567 assemble_name (asm_out_file
, name
);
8568 putc ('\n', asm_out_file
);
8569 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8570 DECL_WEAK (decl
) = 1;
8574 if (USE_HIDDEN_LINKONCE
)
8576 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8578 targetm
.asm_out
.unique_section (decl
, 0);
8579 switch_to_section (get_named_section (decl
, NULL
, 0));
8581 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8582 fputs ("\t.hidden\t", asm_out_file
);
8583 assemble_name (asm_out_file
, name
);
8584 putc ('\n', asm_out_file
);
8585 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8589 switch_to_section (text_section
);
8590 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8593 DECL_INITIAL (decl
) = make_node (BLOCK
);
8594 current_function_decl
= decl
;
8595 init_function_start (decl
);
8596 first_function_block_is_cold
= false;
8597 /* Make sure unwind info is emitted for the thunk if needed. */
8598 final_start_function (emit_barrier (), asm_out_file
, 1);
8600 /* Pad stack IP move with 4 instructions (two NOPs count
8601 as one instruction). */
8602 if (TARGET_PAD_SHORT_FUNCTION
)
8607 fputs ("\tnop\n", asm_out_file
);
8610 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8611 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8612 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8613 fputs ("\tret\n", asm_out_file
);
8614 final_end_function ();
8615 init_insn_lengths ();
8616 free_after_compilation (cfun
);
8618 current_function_decl
= NULL
;
8621 if (flag_split_stack
)
8622 file_end_indicate_split_stack ();
8625 /* Emit code for the SET_GOT patterns. */
8628 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8634 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8636 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8637 xops
[2] = gen_rtx_MEM (Pmode
,
8638 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8639 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8641 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8642 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8643 an unadorned address. */
8644 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8645 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8646 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8650 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8654 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8656 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8659 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8660 is what will be referenced by the Mach-O PIC subsystem. */
8662 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8665 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8666 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8671 get_pc_thunk_name (name
, REGNO (dest
));
8672 pic_labels_used
|= 1 << REGNO (dest
);
8674 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8675 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8676 output_asm_insn ("call\t%X2", xops
);
8677 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8678 is what will be referenced by the Mach-O PIC subsystem. */
8681 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8683 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8684 CODE_LABEL_NUMBER (label
));
8689 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8694 /* Generate an "push" pattern for input ARG. */
8699 struct machine_function
*m
= cfun
->machine
;
8701 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8702 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8703 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8705 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8706 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8708 return gen_rtx_SET (VOIDmode
,
8709 gen_rtx_MEM (word_mode
,
8710 gen_rtx_PRE_DEC (Pmode
,
8711 stack_pointer_rtx
)),
8715 /* Generate an "pop" pattern for input ARG. */
8720 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8721 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8723 return gen_rtx_SET (VOIDmode
,
8725 gen_rtx_MEM (word_mode
,
8726 gen_rtx_POST_INC (Pmode
,
8727 stack_pointer_rtx
)));
8730 /* Return >= 0 if there is an unused call-clobbered register available
8731 for the entire function. */
8734 ix86_select_alt_pic_regnum (void)
8736 if (current_function_is_leaf
8738 && !ix86_current_function_calls_tls_descriptor
)
8741 /* Can't use the same register for both PIC and DRAP. */
8743 drap
= REGNO (crtl
->drap_reg
);
8746 for (i
= 2; i
>= 0; --i
)
8747 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8751 return INVALID_REGNUM
;
8754 /* Return TRUE if we need to save REGNO. */
8757 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8759 if (pic_offset_table_rtx
8760 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8761 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8763 || crtl
->calls_eh_return
8764 || crtl
->uses_const_pool
))
8765 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8767 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8772 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8773 if (test
== INVALID_REGNUM
)
8780 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8783 return (df_regs_ever_live_p (regno
)
8784 && !call_used_regs
[regno
]
8785 && !fixed_regs
[regno
]
8786 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8789 /* Return number of saved general prupose registers. */
8792 ix86_nsaved_regs (void)
8797 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8798 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8803 /* Return number of saved SSE registrers. */
8806 ix86_nsaved_sseregs (void)
8811 if (!TARGET_64BIT_MS_ABI
)
8813 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8814 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8819 /* Given FROM and TO register numbers, say whether this elimination is
8820 allowed. If stack alignment is needed, we can only replace argument
8821 pointer with hard frame pointer, or replace frame pointer with stack
8822 pointer. Otherwise, frame pointer elimination is automatically
8823 handled and all other eliminations are valid. */
8826 ix86_can_eliminate (const int from
, const int to
)
8828 if (stack_realign_fp
)
8829 return ((from
== ARG_POINTER_REGNUM
8830 && to
== HARD_FRAME_POINTER_REGNUM
)
8831 || (from
== FRAME_POINTER_REGNUM
8832 && to
== STACK_POINTER_REGNUM
));
8834 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8837 /* Return the offset between two registers, one to be eliminated, and the other
8838 its replacement, at the start of a routine. */
8841 ix86_initial_elimination_offset (int from
, int to
)
8843 struct ix86_frame frame
;
8844 ix86_compute_frame_layout (&frame
);
8846 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8847 return frame
.hard_frame_pointer_offset
;
8848 else if (from
== FRAME_POINTER_REGNUM
8849 && to
== HARD_FRAME_POINTER_REGNUM
)
8850 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8853 gcc_assert (to
== STACK_POINTER_REGNUM
);
8855 if (from
== ARG_POINTER_REGNUM
)
8856 return frame
.stack_pointer_offset
;
8858 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8859 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8863 /* In a dynamically-aligned function, we can't know the offset from
8864 stack pointer to frame pointer, so we must ensure that setjmp
8865 eliminates fp against the hard fp (%ebp) rather than trying to
8866 index from %esp up to the top of the frame across a gap that is
8867 of unknown (at compile-time) size. */
8869 ix86_builtin_setjmp_frame_value (void)
8871 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8874 /* When using -fsplit-stack, the allocation routines set a field in
8875 the TCB to the bottom of the stack plus this much space, measured
8878 #define SPLIT_STACK_AVAILABLE 256
8880 /* Fill structure ix86_frame about frame of currently computed function. */
8883 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8885 unsigned int stack_alignment_needed
;
8886 HOST_WIDE_INT offset
;
8887 unsigned int preferred_alignment
;
8888 HOST_WIDE_INT size
= get_frame_size ();
8889 HOST_WIDE_INT to_allocate
;
8891 frame
->nregs
= ix86_nsaved_regs ();
8892 frame
->nsseregs
= ix86_nsaved_sseregs ();
8894 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8895 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8897 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8898 function prologues and leaf. */
8899 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8900 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
8901 || ix86_current_function_calls_tls_descriptor
))
8903 preferred_alignment
= 16;
8904 stack_alignment_needed
= 16;
8905 crtl
->preferred_stack_boundary
= 128;
8906 crtl
->stack_alignment_needed
= 128;
8909 gcc_assert (!size
|| stack_alignment_needed
);
8910 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8911 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8913 /* For SEH we have to limit the amount of code movement into the prologue.
8914 At present we do this via a BLOCKAGE, at which point there's very little
8915 scheduling that can be done, which means that there's very little point
8916 in doing anything except PUSHs. */
8918 cfun
->machine
->use_fast_prologue_epilogue
= false;
8920 /* During reload iteration the amount of registers saved can change.
8921 Recompute the value as needed. Do not recompute when amount of registers
8922 didn't change as reload does multiple calls to the function and does not
8923 expect the decision to change within single iteration. */
8924 else if (!optimize_function_for_size_p (cfun
)
8925 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8927 int count
= frame
->nregs
;
8928 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8930 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8932 /* The fast prologue uses move instead of push to save registers. This
8933 is significantly longer, but also executes faster as modern hardware
8934 can execute the moves in parallel, but can't do that for push/pop.
8936 Be careful about choosing what prologue to emit: When function takes
8937 many instructions to execute we may use slow version as well as in
8938 case function is known to be outside hot spot (this is known with
8939 feedback only). Weight the size of function by number of registers
8940 to save as it is cheap to use one or two push instructions but very
8941 slow to use many of them. */
8943 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8944 if (node
->frequency
< NODE_FREQUENCY_NORMAL
8945 || (flag_branch_probabilities
8946 && node
->frequency
< NODE_FREQUENCY_HOT
))
8947 cfun
->machine
->use_fast_prologue_epilogue
= false;
8949 cfun
->machine
->use_fast_prologue_epilogue
8950 = !expensive_function_p (count
);
8953 frame
->save_regs_using_mov
8954 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
8955 /* If static stack checking is enabled and done with probes,
8956 the registers need to be saved before allocating the frame. */
8957 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
8959 /* Skip return address. */
8960 offset
= UNITS_PER_WORD
;
8962 /* Skip pushed static chain. */
8963 if (ix86_static_chain_on_stack
)
8964 offset
+= UNITS_PER_WORD
;
8966 /* Skip saved base pointer. */
8967 if (frame_pointer_needed
)
8968 offset
+= UNITS_PER_WORD
;
8969 frame
->hfp_save_offset
= offset
;
8971 /* The traditional frame pointer location is at the top of the frame. */
8972 frame
->hard_frame_pointer_offset
= offset
;
8974 /* Register save area */
8975 offset
+= frame
->nregs
* UNITS_PER_WORD
;
8976 frame
->reg_save_offset
= offset
;
8978 /* Align and set SSE register save area. */
8979 if (frame
->nsseregs
)
8981 /* The only ABI that has saved SSE registers (Win64) also has a
8982 16-byte aligned default stack, and thus we don't need to be
8983 within the re-aligned local stack frame to save them. */
8984 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
8985 offset
= (offset
+ 16 - 1) & -16;
8986 offset
+= frame
->nsseregs
* 16;
8988 frame
->sse_reg_save_offset
= offset
;
8990 /* The re-aligned stack starts here. Values before this point are not
8991 directly comparable with values below this point. In order to make
8992 sure that no value happens to be the same before and after, force
8993 the alignment computation below to add a non-zero value. */
8994 if (stack_realign_fp
)
8995 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
8998 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
8999 offset
+= frame
->va_arg_size
;
9001 /* Align start of frame for local function. */
9002 if (stack_realign_fp
9003 || offset
!= frame
->sse_reg_save_offset
9005 || !current_function_is_leaf
9006 || cfun
->calls_alloca
9007 || ix86_current_function_calls_tls_descriptor
)
9008 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9010 /* Frame pointer points here. */
9011 frame
->frame_pointer_offset
= offset
;
9015 /* Add outgoing arguments area. Can be skipped if we eliminated
9016 all the function calls as dead code.
9017 Skipping is however impossible when function calls alloca. Alloca
9018 expander assumes that last crtl->outgoing_args_size
9019 of stack frame are unused. */
9020 if (ACCUMULATE_OUTGOING_ARGS
9021 && (!current_function_is_leaf
|| cfun
->calls_alloca
9022 || ix86_current_function_calls_tls_descriptor
))
9024 offset
+= crtl
->outgoing_args_size
;
9025 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9028 frame
->outgoing_arguments_size
= 0;
9030 /* Align stack boundary. Only needed if we're calling another function
9032 if (!current_function_is_leaf
|| cfun
->calls_alloca
9033 || ix86_current_function_calls_tls_descriptor
)
9034 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9036 /* We've reached end of stack frame. */
9037 frame
->stack_pointer_offset
= offset
;
9039 /* Size prologue needs to allocate. */
9040 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9042 if ((!to_allocate
&& frame
->nregs
<= 1)
9043 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9044 frame
->save_regs_using_mov
= false;
9046 if (ix86_using_red_zone ()
9047 && current_function_sp_is_unchanging
9048 && current_function_is_leaf
9049 && !ix86_current_function_calls_tls_descriptor
)
9051 frame
->red_zone_size
= to_allocate
;
9052 if (frame
->save_regs_using_mov
)
9053 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9054 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9055 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9058 frame
->red_zone_size
= 0;
9059 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9061 /* The SEH frame pointer location is near the bottom of the frame.
9062 This is enforced by the fact that the difference between the
9063 stack pointer and the frame pointer is limited to 240 bytes in
9064 the unwind data structure. */
9069 /* If we can leave the frame pointer where it is, do so. */
9070 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9071 if (diff
> 240 || (diff
& 15) != 0)
9073 /* Ideally we'd determine what portion of the local stack frame
9074 (within the constraint of the lowest 240) is most heavily used.
9075 But without that complication, simply bias the frame pointer
9076 by 128 bytes so as to maximize the amount of the local stack
9077 frame that is addressable with 8-bit offsets. */
9078 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9083 /* This is semi-inlined memory_address_length, but simplified
9084 since we know that we're always dealing with reg+offset, and
9085 to avoid having to create and discard all that rtl. */
9088 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9094 /* EBP and R13 cannot be encoded without an offset. */
9095 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9097 else if (IN_RANGE (offset
, -128, 127))
9100 /* ESP and R12 must be encoded with a SIB byte. */
9101 if (regno
== SP_REG
|| regno
== R12_REG
)
9107 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9108 The valid base registers are taken from CFUN->MACHINE->FS. */
9111 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9113 const struct machine_function
*m
= cfun
->machine
;
9114 rtx base_reg
= NULL
;
9115 HOST_WIDE_INT base_offset
= 0;
9117 if (m
->use_fast_prologue_epilogue
)
9119 /* Choose the base register most likely to allow the most scheduling
9120 opportunities. Generally FP is valid througout the function,
9121 while DRAP must be reloaded within the epilogue. But choose either
9122 over the SP due to increased encoding size. */
9126 base_reg
= hard_frame_pointer_rtx
;
9127 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9129 else if (m
->fs
.drap_valid
)
9131 base_reg
= crtl
->drap_reg
;
9132 base_offset
= 0 - cfa_offset
;
9134 else if (m
->fs
.sp_valid
)
9136 base_reg
= stack_pointer_rtx
;
9137 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9142 HOST_WIDE_INT toffset
;
9145 /* Choose the base register with the smallest address encoding.
9146 With a tie, choose FP > DRAP > SP. */
9149 base_reg
= stack_pointer_rtx
;
9150 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9151 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9153 if (m
->fs
.drap_valid
)
9155 toffset
= 0 - cfa_offset
;
9156 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9159 base_reg
= crtl
->drap_reg
;
9160 base_offset
= toffset
;
9166 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9167 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9170 base_reg
= hard_frame_pointer_rtx
;
9171 base_offset
= toffset
;
9176 gcc_assert (base_reg
!= NULL
);
9178 return plus_constant (base_reg
, base_offset
);
9181 /* Emit code to save registers in the prologue. */
9184 ix86_emit_save_regs (void)
9189 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9190 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9192 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9193 RTX_FRAME_RELATED_P (insn
) = 1;
9197 /* Emit a single register save at CFA - CFA_OFFSET. */
9200 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9201 HOST_WIDE_INT cfa_offset
)
9203 struct machine_function
*m
= cfun
->machine
;
9204 rtx reg
= gen_rtx_REG (mode
, regno
);
9205 rtx mem
, addr
, base
, insn
;
9207 addr
= choose_baseaddr (cfa_offset
);
9208 mem
= gen_frame_mem (mode
, addr
);
9210 /* For SSE saves, we need to indicate the 128-bit alignment. */
9211 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9213 insn
= emit_move_insn (mem
, reg
);
9214 RTX_FRAME_RELATED_P (insn
) = 1;
9217 if (GET_CODE (base
) == PLUS
)
9218 base
= XEXP (base
, 0);
9219 gcc_checking_assert (REG_P (base
));
9221 /* When saving registers into a re-aligned local stack frame, avoid
9222 any tricky guessing by dwarf2out. */
9223 if (m
->fs
.realigned
)
9225 gcc_checking_assert (stack_realign_drap
);
9227 if (regno
== REGNO (crtl
->drap_reg
))
9229 /* A bit of a hack. We force the DRAP register to be saved in
9230 the re-aligned stack frame, which provides us with a copy
9231 of the CFA that will last past the prologue. Install it. */
9232 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9233 addr
= plus_constant (hard_frame_pointer_rtx
,
9234 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9235 mem
= gen_rtx_MEM (mode
, addr
);
9236 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9240 /* The frame pointer is a stable reference within the
9241 aligned frame. Use it. */
9242 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9243 addr
= plus_constant (hard_frame_pointer_rtx
,
9244 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9245 mem
= gen_rtx_MEM (mode
, addr
);
9246 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9247 gen_rtx_SET (VOIDmode
, mem
, reg
));
9251 /* The memory may not be relative to the current CFA register,
9252 which means that we may need to generate a new pattern for
9253 use by the unwind info. */
9254 else if (base
!= m
->fs
.cfa_reg
)
9256 addr
= plus_constant (m
->fs
.cfa_reg
, m
->fs
.cfa_offset
- cfa_offset
);
9257 mem
= gen_rtx_MEM (mode
, addr
);
9258 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9262 /* Emit code to save registers using MOV insns.
9263 First register is stored at CFA - CFA_OFFSET. */
9265 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9269 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9270 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9272 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9273 cfa_offset
-= UNITS_PER_WORD
;
9277 /* Emit code to save SSE registers using MOV insns.
9278 First register is stored at CFA - CFA_OFFSET. */
9280 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9284 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9285 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9287 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9292 static GTY(()) rtx queued_cfa_restores
;
9294 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9295 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9296 Don't add the note if the previously saved value will be left untouched
9297 within stack red-zone till return, as unwinders can find the same value
9298 in the register and on the stack. */
9301 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9303 if (!crtl
->shrink_wrapped
9304 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9309 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9310 RTX_FRAME_RELATED_P (insn
) = 1;
9314 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9317 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9320 ix86_add_queued_cfa_restore_notes (rtx insn
)
9323 if (!queued_cfa_restores
)
9325 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9327 XEXP (last
, 1) = REG_NOTES (insn
);
9328 REG_NOTES (insn
) = queued_cfa_restores
;
9329 queued_cfa_restores
= NULL_RTX
;
9330 RTX_FRAME_RELATED_P (insn
) = 1;
9333 /* Expand prologue or epilogue stack adjustment.
9334 The pattern exist to put a dependency on all ebp-based memory accesses.
9335 STYLE should be negative if instructions should be marked as frame related,
9336 zero if %r11 register is live and cannot be freely used and positive
9340 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9341 int style
, bool set_cfa
)
9343 struct machine_function
*m
= cfun
->machine
;
9345 bool add_frame_related_expr
= false;
9347 if (Pmode
== SImode
)
9348 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9349 else if (x86_64_immediate_operand (offset
, DImode
))
9350 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9354 /* r11 is used by indirect sibcall return as well, set before the
9355 epilogue and used after the epilogue. */
9357 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9360 gcc_assert (src
!= hard_frame_pointer_rtx
9361 && dest
!= hard_frame_pointer_rtx
);
9362 tmp
= hard_frame_pointer_rtx
;
9364 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9366 add_frame_related_expr
= true;
9368 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9371 insn
= emit_insn (insn
);
9373 ix86_add_queued_cfa_restore_notes (insn
);
9379 gcc_assert (m
->fs
.cfa_reg
== src
);
9380 m
->fs
.cfa_offset
+= INTVAL (offset
);
9381 m
->fs
.cfa_reg
= dest
;
9383 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9384 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9385 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9386 RTX_FRAME_RELATED_P (insn
) = 1;
9390 RTX_FRAME_RELATED_P (insn
) = 1;
9391 if (add_frame_related_expr
)
9393 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9394 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9395 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9399 if (dest
== stack_pointer_rtx
)
9401 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9402 bool valid
= m
->fs
.sp_valid
;
9404 if (src
== hard_frame_pointer_rtx
)
9406 valid
= m
->fs
.fp_valid
;
9407 ooffset
= m
->fs
.fp_offset
;
9409 else if (src
== crtl
->drap_reg
)
9411 valid
= m
->fs
.drap_valid
;
9416 /* Else there are two possibilities: SP itself, which we set
9417 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9418 taken care of this by hand along the eh_return path. */
9419 gcc_checking_assert (src
== stack_pointer_rtx
9420 || offset
== const0_rtx
);
9423 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9424 m
->fs
.sp_valid
= valid
;
9428 /* Find an available register to be used as dynamic realign argument
9429 pointer regsiter. Such a register will be written in prologue and
9430 used in begin of body, so it must not be
9431 1. parameter passing register.
9433 We reuse static-chain register if it is available. Otherwise, we
9434 use DI for i386 and R13 for x86-64. We chose R13 since it has
9437 Return: the regno of chosen register. */
9440 find_drap_reg (void)
9442 tree decl
= cfun
->decl
;
9446 /* Use R13 for nested function or function need static chain.
9447 Since function with tail call may use any caller-saved
9448 registers in epilogue, DRAP must not use caller-saved
9449 register in such case. */
9450 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9457 /* Use DI for nested function or function need static chain.
9458 Since function with tail call may use any caller-saved
9459 registers in epilogue, DRAP must not use caller-saved
9460 register in such case. */
9461 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9464 /* Reuse static chain register if it isn't used for parameter
9466 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9468 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9469 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9476 /* Return minimum incoming stack alignment. */
9479 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9481 unsigned int incoming_stack_boundary
;
9483 /* Prefer the one specified at command line. */
9484 if (ix86_user_incoming_stack_boundary
)
9485 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9486 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9487 if -mstackrealign is used, it isn't used for sibcall check and
9488 estimated stack alignment is 128bit. */
9491 && ix86_force_align_arg_pointer
9492 && crtl
->stack_alignment_estimated
== 128)
9493 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9495 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9497 /* Incoming stack alignment can be changed on individual functions
9498 via force_align_arg_pointer attribute. We use the smallest
9499 incoming stack boundary. */
9500 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9501 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9502 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9503 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9505 /* The incoming stack frame has to be aligned at least at
9506 parm_stack_boundary. */
9507 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9508 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9510 /* Stack at entrance of main is aligned by runtime. We use the
9511 smallest incoming stack boundary. */
9512 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9513 && DECL_NAME (current_function_decl
)
9514 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9515 && DECL_FILE_SCOPE_P (current_function_decl
))
9516 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9518 return incoming_stack_boundary
;
9521 /* Update incoming stack boundary and estimated stack alignment. */
9524 ix86_update_stack_boundary (void)
9526 ix86_incoming_stack_boundary
9527 = ix86_minimum_incoming_stack_boundary (false);
9529 /* x86_64 vararg needs 16byte stack alignment for register save
9533 && crtl
->stack_alignment_estimated
< 128)
9534 crtl
->stack_alignment_estimated
= 128;
9537 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9538 needed or an rtx for DRAP otherwise. */
9541 ix86_get_drap_rtx (void)
9543 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9544 crtl
->need_drap
= true;
9546 if (stack_realign_drap
)
9548 /* Assign DRAP to vDRAP and returns vDRAP */
9549 unsigned int regno
= find_drap_reg ();
9554 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9555 crtl
->drap_reg
= arg_ptr
;
9558 drap_vreg
= copy_to_reg (arg_ptr
);
9562 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9565 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9566 RTX_FRAME_RELATED_P (insn
) = 1;
9574 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9577 ix86_internal_arg_pointer (void)
9579 return virtual_incoming_args_rtx
;
9582 struct scratch_reg
{
9587 /* Return a short-lived scratch register for use on function entry.
9588 In 32-bit mode, it is valid only after the registers are saved
9589 in the prologue. This register must be released by means of
9590 release_scratch_register_on_entry once it is dead. */
9593 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9601 /* We always use R11 in 64-bit mode. */
9606 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9608 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9609 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9610 int regparm
= ix86_function_regparm (fntype
, decl
);
9612 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9614 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9615 for the static chain register. */
9616 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9617 && drap_regno
!= AX_REG
)
9619 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9621 /* ecx is the static chain register. */
9622 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9623 && drap_regno
!= CX_REG
)
9625 else if (ix86_save_reg (BX_REG
, true))
9627 /* esi is the static chain register. */
9628 else if (!(regparm
== 3 && static_chain_p
)
9629 && ix86_save_reg (SI_REG
, true))
9631 else if (ix86_save_reg (DI_REG
, true))
9635 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9640 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9643 rtx insn
= emit_insn (gen_push (sr
->reg
));
9644 RTX_FRAME_RELATED_P (insn
) = 1;
9648 /* Release a scratch register obtained from the preceding function. */
9651 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9655 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9657 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9658 RTX_FRAME_RELATED_P (insn
) = 1;
9659 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9660 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9661 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9665 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9667 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9670 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9672 /* We skip the probe for the first interval + a small dope of 4 words and
9673 probe that many bytes past the specified size to maintain a protection
9674 area at the botton of the stack. */
9675 const int dope
= 4 * UNITS_PER_WORD
;
9676 rtx size_rtx
= GEN_INT (size
), last
;
9678 /* See if we have a constant small number of probes to generate. If so,
9679 that's the easy case. The run-time loop is made up of 11 insns in the
9680 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9681 for n # of intervals. */
9682 if (size
<= 5 * PROBE_INTERVAL
)
9684 HOST_WIDE_INT i
, adjust
;
9685 bool first_probe
= true;
9687 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9688 values of N from 1 until it exceeds SIZE. If only one probe is
9689 needed, this will not generate any code. Then adjust and probe
9690 to PROBE_INTERVAL + SIZE. */
9691 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9695 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9696 first_probe
= false;
9699 adjust
= PROBE_INTERVAL
;
9701 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9702 plus_constant (stack_pointer_rtx
, -adjust
)));
9703 emit_stack_probe (stack_pointer_rtx
);
9707 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9709 adjust
= size
+ PROBE_INTERVAL
- i
;
9711 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9712 plus_constant (stack_pointer_rtx
, -adjust
)));
9713 emit_stack_probe (stack_pointer_rtx
);
9715 /* Adjust back to account for the additional first interval. */
9716 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9717 plus_constant (stack_pointer_rtx
,
9718 PROBE_INTERVAL
+ dope
)));
9721 /* Otherwise, do the same as above, but in a loop. Note that we must be
9722 extra careful with variables wrapping around because we might be at
9723 the very top (or the very bottom) of the address space and we have
9724 to be able to handle this case properly; in particular, we use an
9725 equality test for the loop condition. */
9728 HOST_WIDE_INT rounded_size
;
9729 struct scratch_reg sr
;
9731 get_scratch_register_on_entry (&sr
);
9734 /* Step 1: round SIZE to the previous multiple of the interval. */
9736 rounded_size
= size
& -PROBE_INTERVAL
;
9739 /* Step 2: compute initial and final value of the loop counter. */
9741 /* SP = SP_0 + PROBE_INTERVAL. */
9742 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9743 plus_constant (stack_pointer_rtx
,
9744 - (PROBE_INTERVAL
+ dope
))));
9746 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9747 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9748 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9749 gen_rtx_PLUS (Pmode
, sr
.reg
,
9750 stack_pointer_rtx
)));
9755 while (SP != LAST_ADDR)
9757 SP = SP + PROBE_INTERVAL
9761 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9762 values of N from 1 until it is equal to ROUNDED_SIZE. */
9764 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9767 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9768 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9770 if (size
!= rounded_size
)
9772 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9773 plus_constant (stack_pointer_rtx
,
9774 rounded_size
- size
)));
9775 emit_stack_probe (stack_pointer_rtx
);
9778 /* Adjust back to account for the additional first interval. */
9779 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9780 plus_constant (stack_pointer_rtx
,
9781 PROBE_INTERVAL
+ dope
)));
9783 release_scratch_register_on_entry (&sr
);
9786 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9788 /* Even if the stack pointer isn't the CFA register, we need to correctly
9789 describe the adjustments made to it, in particular differentiate the
9790 frame-related ones from the frame-unrelated ones. */
9793 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9794 XVECEXP (expr
, 0, 0)
9795 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9796 plus_constant (stack_pointer_rtx
, -size
));
9797 XVECEXP (expr
, 0, 1)
9798 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9799 plus_constant (stack_pointer_rtx
,
9800 PROBE_INTERVAL
+ dope
+ size
));
9801 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9802 RTX_FRAME_RELATED_P (last
) = 1;
9804 cfun
->machine
->fs
.sp_offset
+= size
;
9807 /* Make sure nothing is scheduled before we are done. */
9808 emit_insn (gen_blockage ());
9811 /* Adjust the stack pointer up to REG while probing it. */
9814 output_adjust_stack_and_probe (rtx reg
)
9816 static int labelno
= 0;
9817 char loop_lab
[32], end_lab
[32];
9820 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9821 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9823 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9825 /* Jump to END_LAB if SP == LAST_ADDR. */
9826 xops
[0] = stack_pointer_rtx
;
9828 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9829 fputs ("\tje\t", asm_out_file
);
9830 assemble_name_raw (asm_out_file
, end_lab
);
9831 fputc ('\n', asm_out_file
);
9833 /* SP = SP + PROBE_INTERVAL. */
9834 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9835 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9838 xops
[1] = const0_rtx
;
9839 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9841 fprintf (asm_out_file
, "\tjmp\t");
9842 assemble_name_raw (asm_out_file
, loop_lab
);
9843 fputc ('\n', asm_out_file
);
9845 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9850 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9851 inclusive. These are offsets from the current stack pointer. */
9854 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9856 /* See if we have a constant small number of probes to generate. If so,
9857 that's the easy case. The run-time loop is made up of 7 insns in the
9858 generic case while the compile-time loop is made up of n insns for n #
9860 if (size
<= 7 * PROBE_INTERVAL
)
9864 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9865 it exceeds SIZE. If only one probe is needed, this will not
9866 generate any code. Then probe at FIRST + SIZE. */
9867 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9868 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ i
)));
9870 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ size
)));
9873 /* Otherwise, do the same as above, but in a loop. Note that we must be
9874 extra careful with variables wrapping around because we might be at
9875 the very top (or the very bottom) of the address space and we have
9876 to be able to handle this case properly; in particular, we use an
9877 equality test for the loop condition. */
9880 HOST_WIDE_INT rounded_size
, last
;
9881 struct scratch_reg sr
;
9883 get_scratch_register_on_entry (&sr
);
9886 /* Step 1: round SIZE to the previous multiple of the interval. */
9888 rounded_size
= size
& -PROBE_INTERVAL
;
9891 /* Step 2: compute initial and final value of the loop counter. */
9893 /* TEST_OFFSET = FIRST. */
9894 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9896 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9897 last
= first
+ rounded_size
;
9902 while (TEST_ADDR != LAST_ADDR)
9904 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9908 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9909 until it is equal to ROUNDED_SIZE. */
9911 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9914 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9915 that SIZE is equal to ROUNDED_SIZE. */
9917 if (size
!= rounded_size
)
9918 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode
,
9921 rounded_size
- size
));
9923 release_scratch_register_on_entry (&sr
);
9926 /* Make sure nothing is scheduled before we are done. */
9927 emit_insn (gen_blockage ());
9930 /* Probe a range of stack addresses from REG to END, inclusive. These are
9931 offsets from the current stack pointer. */
9934 output_probe_stack_range (rtx reg
, rtx end
)
9936 static int labelno
= 0;
9937 char loop_lab
[32], end_lab
[32];
9940 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9941 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9943 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9945 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9948 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9949 fputs ("\tje\t", asm_out_file
);
9950 assemble_name_raw (asm_out_file
, end_lab
);
9951 fputc ('\n', asm_out_file
);
9953 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9954 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9955 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9957 /* Probe at TEST_ADDR. */
9958 xops
[0] = stack_pointer_rtx
;
9960 xops
[2] = const0_rtx
;
9961 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
9963 fprintf (asm_out_file
, "\tjmp\t");
9964 assemble_name_raw (asm_out_file
, loop_lab
);
9965 fputc ('\n', asm_out_file
);
9967 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9972 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9973 to be generated in correct form. */
9975 ix86_finalize_stack_realign_flags (void)
9977 /* Check if stack realign is really needed after reload, and
9978 stores result in cfun */
9979 unsigned int incoming_stack_boundary
9980 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
9981 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
9982 unsigned int stack_realign
= (incoming_stack_boundary
9983 < (current_function_is_leaf
9984 ? crtl
->max_used_stack_slot_alignment
9985 : crtl
->stack_alignment_needed
));
9987 if (crtl
->stack_realign_finalized
)
9989 /* After stack_realign_needed is finalized, we can't no longer
9991 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
9995 /* If the only reason for frame_pointer_needed is that we conservatively
9996 assumed stack realignment might be needed, but in the end nothing that
9997 needed the stack alignment had been spilled, clear frame_pointer_needed
9998 and say we don't need stack realignment. */
10000 && !crtl
->need_drap
10001 && frame_pointer_needed
10002 && current_function_is_leaf
10003 && flag_omit_frame_pointer
10004 && current_function_sp_is_unchanging
10005 && !ix86_current_function_calls_tls_descriptor
10006 && !crtl
->accesses_prior_frames
10007 && !cfun
->calls_alloca
10008 && !crtl
->calls_eh_return
10009 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10010 && !ix86_frame_pointer_required ()
10011 && get_frame_size () == 0
10012 && ix86_nsaved_sseregs () == 0
10013 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10015 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10018 CLEAR_HARD_REG_SET (prologue_used
);
10019 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10020 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10021 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10022 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10023 HARD_FRAME_POINTER_REGNUM
);
10027 FOR_BB_INSNS (bb
, insn
)
10028 if (NONDEBUG_INSN_P (insn
)
10029 && requires_stack_frame_p (insn
, prologue_used
,
10030 set_up_by_prologue
))
10032 crtl
->stack_realign_needed
= stack_realign
;
10033 crtl
->stack_realign_finalized
= true;
10038 frame_pointer_needed
= false;
10039 stack_realign
= false;
10040 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10041 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10042 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10043 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10044 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10045 df_finish_pass (true);
10046 df_scan_alloc (NULL
);
10048 df_compute_regs_ever_live (true);
10052 crtl
->stack_realign_needed
= stack_realign
;
10053 crtl
->stack_realign_finalized
= true;
10056 /* Expand the prologue into a bunch of separate insns. */
10059 ix86_expand_prologue (void)
10061 struct machine_function
*m
= cfun
->machine
;
10064 struct ix86_frame frame
;
10065 HOST_WIDE_INT allocate
;
10066 bool int_registers_saved
;
10068 ix86_finalize_stack_realign_flags ();
10070 /* DRAP should not coexist with stack_realign_fp */
10071 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10073 memset (&m
->fs
, 0, sizeof (m
->fs
));
10075 /* Initialize CFA state for before the prologue. */
10076 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10077 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10079 /* Track SP offset to the CFA. We continue tracking this after we've
10080 swapped the CFA register away from SP. In the case of re-alignment
10081 this is fudged; we're interested to offsets within the local frame. */
10082 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10083 m
->fs
.sp_valid
= true;
10085 ix86_compute_frame_layout (&frame
);
10087 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10089 /* We should have already generated an error for any use of
10090 ms_hook on a nested function. */
10091 gcc_checking_assert (!ix86_static_chain_on_stack
);
10093 /* Check if profiling is active and we shall use profiling before
10094 prologue variant. If so sorry. */
10095 if (crtl
->profile
&& flag_fentry
!= 0)
10096 sorry ("ms_hook_prologue attribute isn%'t compatible "
10097 "with -mfentry for 32-bit");
10099 /* In ix86_asm_output_function_label we emitted:
10100 8b ff movl.s %edi,%edi
10102 8b ec movl.s %esp,%ebp
10104 This matches the hookable function prologue in Win32 API
10105 functions in Microsoft Windows XP Service Pack 2 and newer.
10106 Wine uses this to enable Windows apps to hook the Win32 API
10107 functions provided by Wine.
10109 What that means is that we've already set up the frame pointer. */
10111 if (frame_pointer_needed
10112 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10116 /* We've decided to use the frame pointer already set up.
10117 Describe this to the unwinder by pretending that both
10118 push and mov insns happen right here.
10120 Putting the unwind info here at the end of the ms_hook
10121 is done so that we can make absolutely certain we get
10122 the required byte sequence at the start of the function,
10123 rather than relying on an assembler that can produce
10124 the exact encoding required.
10126 However it does mean (in the unpatched case) that we have
10127 a 1 insn window where the asynchronous unwind info is
10128 incorrect. However, if we placed the unwind info at
10129 its correct location we would have incorrect unwind info
10130 in the patched case. Which is probably all moot since
10131 I don't expect Wine generates dwarf2 unwind info for the
10132 system libraries that use this feature. */
10134 insn
= emit_insn (gen_blockage ());
10136 push
= gen_push (hard_frame_pointer_rtx
);
10137 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10138 stack_pointer_rtx
);
10139 RTX_FRAME_RELATED_P (push
) = 1;
10140 RTX_FRAME_RELATED_P (mov
) = 1;
10142 RTX_FRAME_RELATED_P (insn
) = 1;
10143 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10144 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10146 /* Note that gen_push incremented m->fs.cfa_offset, even
10147 though we didn't emit the push insn here. */
10148 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10149 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10150 m
->fs
.fp_valid
= true;
10154 /* The frame pointer is not needed so pop %ebp again.
10155 This leaves us with a pristine state. */
10156 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10160 /* The first insn of a function that accepts its static chain on the
10161 stack is to push the register that would be filled in by a direct
10162 call. This insn will be skipped by the trampoline. */
10163 else if (ix86_static_chain_on_stack
)
10165 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10166 emit_insn (gen_blockage ());
10168 /* We don't want to interpret this push insn as a register save,
10169 only as a stack adjustment. The real copy of the register as
10170 a save will be done later, if needed. */
10171 t
= plus_constant (stack_pointer_rtx
, -UNITS_PER_WORD
);
10172 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10173 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10174 RTX_FRAME_RELATED_P (insn
) = 1;
10177 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10178 of DRAP is needed and stack realignment is really needed after reload */
10179 if (stack_realign_drap
)
10181 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10183 /* Only need to push parameter pointer reg if it is caller saved. */
10184 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10186 /* Push arg pointer reg */
10187 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10188 RTX_FRAME_RELATED_P (insn
) = 1;
10191 /* Grab the argument pointer. */
10192 t
= plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
);
10193 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10194 RTX_FRAME_RELATED_P (insn
) = 1;
10195 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10196 m
->fs
.cfa_offset
= 0;
10198 /* Align the stack. */
10199 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10201 GEN_INT (-align_bytes
)));
10202 RTX_FRAME_RELATED_P (insn
) = 1;
10204 /* Replicate the return address on the stack so that return
10205 address can be reached via (argp - 1) slot. This is needed
10206 to implement macro RETURN_ADDR_RTX and intrinsic function
10207 expand_builtin_return_addr etc. */
10208 t
= plus_constant (crtl
->drap_reg
, -UNITS_PER_WORD
);
10209 t
= gen_frame_mem (word_mode
, t
);
10210 insn
= emit_insn (gen_push (t
));
10211 RTX_FRAME_RELATED_P (insn
) = 1;
10213 /* For the purposes of frame and register save area addressing,
10214 we've started over with a new frame. */
10215 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10216 m
->fs
.realigned
= true;
10219 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10221 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10222 slower on all targets. Also sdb doesn't like it. */
10223 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10224 RTX_FRAME_RELATED_P (insn
) = 1;
10226 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10228 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10229 RTX_FRAME_RELATED_P (insn
) = 1;
10231 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10232 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10233 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10234 m
->fs
.fp_valid
= true;
10238 int_registers_saved
= (frame
.nregs
== 0);
10240 if (!int_registers_saved
)
10242 /* If saving registers via PUSH, do so now. */
10243 if (!frame
.save_regs_using_mov
)
10245 ix86_emit_save_regs ();
10246 int_registers_saved
= true;
10247 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10250 /* When using red zone we may start register saving before allocating
10251 the stack frame saving one cycle of the prologue. However, avoid
10252 doing this if we have to probe the stack; at least on x86_64 the
10253 stack probe can turn into a call that clobbers a red zone location. */
10254 else if (ix86_using_red_zone ()
10255 && (! TARGET_STACK_PROBE
10256 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10258 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10259 int_registers_saved
= true;
10263 if (stack_realign_fp
)
10265 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10266 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10268 /* The computation of the size of the re-aligned stack frame means
10269 that we must allocate the size of the register save area before
10270 performing the actual alignment. Otherwise we cannot guarantee
10271 that there's enough storage above the realignment point. */
10272 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10273 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10274 GEN_INT (m
->fs
.sp_offset
10275 - frame
.sse_reg_save_offset
),
10278 /* Align the stack. */
10279 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10281 GEN_INT (-align_bytes
)));
10283 /* For the purposes of register save area addressing, the stack
10284 pointer is no longer valid. As for the value of sp_offset,
10285 see ix86_compute_frame_layout, which we need to match in order
10286 to pass verification of stack_pointer_offset at the end. */
10287 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10288 m
->fs
.sp_valid
= false;
10291 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10293 if (flag_stack_usage_info
)
10295 /* We start to count from ARG_POINTER. */
10296 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10298 /* If it was realigned, take into account the fake frame. */
10299 if (stack_realign_drap
)
10301 if (ix86_static_chain_on_stack
)
10302 stack_size
+= UNITS_PER_WORD
;
10304 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10305 stack_size
+= UNITS_PER_WORD
;
10307 /* This over-estimates by 1 minimal-stack-alignment-unit but
10308 mitigates that by counting in the new return address slot. */
10309 current_function_dynamic_stack_size
10310 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10313 current_function_static_stack_size
= stack_size
;
10316 /* The stack has already been decremented by the instruction calling us
10317 so probe if the size is non-negative to preserve the protection area. */
10318 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10320 /* We expect the registers to be saved when probes are used. */
10321 gcc_assert (int_registers_saved
);
10323 if (STACK_CHECK_MOVING_SP
)
10325 ix86_adjust_stack_and_probe (allocate
);
10330 HOST_WIDE_INT size
= allocate
;
10332 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10333 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10335 if (TARGET_STACK_PROBE
)
10336 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10338 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10344 else if (!ix86_target_stack_probe ()
10345 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10347 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10348 GEN_INT (-allocate
), -1,
10349 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10353 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10355 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10357 bool eax_live
= false;
10358 bool r10_live
= false;
10361 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10362 if (!TARGET_64BIT_MS_ABI
)
10363 eax_live
= ix86_eax_live_at_start_p ();
10367 emit_insn (gen_push (eax
));
10368 allocate
-= UNITS_PER_WORD
;
10372 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10373 emit_insn (gen_push (r10
));
10374 allocate
-= UNITS_PER_WORD
;
10377 emit_move_insn (eax
, GEN_INT (allocate
));
10378 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10380 /* Use the fact that AX still contains ALLOCATE. */
10381 adjust_stack_insn
= (Pmode
== DImode
10382 ? gen_pro_epilogue_adjust_stack_di_sub
10383 : gen_pro_epilogue_adjust_stack_si_sub
);
10385 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10386 stack_pointer_rtx
, eax
));
10388 /* Note that SEH directives need to continue tracking the stack
10389 pointer even after the frame pointer has been set up. */
10390 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10392 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10393 m
->fs
.cfa_offset
+= allocate
;
10395 RTX_FRAME_RELATED_P (insn
) = 1;
10396 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10397 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10398 plus_constant (stack_pointer_rtx
,
10401 m
->fs
.sp_offset
+= allocate
;
10403 if (r10_live
&& eax_live
)
10405 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10406 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10407 gen_frame_mem (word_mode
, t
));
10408 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10409 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10410 gen_frame_mem (word_mode
, t
));
10412 else if (eax_live
|| r10_live
)
10414 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10415 emit_move_insn (gen_rtx_REG (word_mode
,
10416 (eax_live
? AX_REG
: R10_REG
)),
10417 gen_frame_mem (word_mode
, t
));
10420 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10422 /* If we havn't already set up the frame pointer, do so now. */
10423 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10425 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10426 GEN_INT (frame
.stack_pointer_offset
10427 - frame
.hard_frame_pointer_offset
));
10428 insn
= emit_insn (insn
);
10429 RTX_FRAME_RELATED_P (insn
) = 1;
10430 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10432 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10433 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10434 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10435 m
->fs
.fp_valid
= true;
10438 if (!int_registers_saved
)
10439 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10440 if (frame
.nsseregs
)
10441 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10443 pic_reg_used
= false;
10444 if (pic_offset_table_rtx
10445 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10448 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10450 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10451 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10453 pic_reg_used
= true;
10460 if (ix86_cmodel
== CM_LARGE_PIC
)
10462 rtx label
, tmp_reg
;
10464 gcc_assert (Pmode
== DImode
);
10465 label
= gen_label_rtx ();
10466 emit_label (label
);
10467 LABEL_PRESERVE_P (label
) = 1;
10468 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10469 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10470 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10472 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10473 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10474 pic_offset_table_rtx
, tmp_reg
));
10477 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10481 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10482 RTX_FRAME_RELATED_P (insn
) = 1;
10483 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10487 /* In the pic_reg_used case, make sure that the got load isn't deleted
10488 when mcount needs it. Blockage to avoid call movement across mcount
10489 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10491 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10492 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10494 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10496 /* vDRAP is setup but after reload it turns out stack realign
10497 isn't necessary, here we will emit prologue to setup DRAP
10498 without stack realign adjustment */
10499 t
= choose_baseaddr (0);
10500 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10503 /* Prevent instructions from being scheduled into register save push
10504 sequence when access to the redzone area is done through frame pointer.
10505 The offset between the frame pointer and the stack pointer is calculated
10506 relative to the value of the stack pointer at the end of the function
10507 prologue, and moving instructions that access redzone area via frame
10508 pointer inside push sequence violates this assumption. */
10509 if (frame_pointer_needed
&& frame
.red_zone_size
)
10510 emit_insn (gen_memory_blockage ());
10512 /* Emit cld instruction if stringops are used in the function. */
10513 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10514 emit_insn (gen_cld ());
10516 /* SEH requires that the prologue end within 256 bytes of the start of
10517 the function. Prevent instruction schedules that would extend that.
10518 Further, prevent alloca modifications to the stack pointer from being
10519 combined with prologue modifications. */
10521 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10524 /* Emit code to restore REG using a POP insn. */
10527 ix86_emit_restore_reg_using_pop (rtx reg
)
10529 struct machine_function
*m
= cfun
->machine
;
10530 rtx insn
= emit_insn (gen_pop (reg
));
10532 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10533 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10535 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10536 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10538 /* Previously we'd represented the CFA as an expression
10539 like *(%ebp - 8). We've just popped that value from
10540 the stack, which means we need to reset the CFA to
10541 the drap register. This will remain until we restore
10542 the stack pointer. */
10543 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10544 RTX_FRAME_RELATED_P (insn
) = 1;
10546 /* This means that the DRAP register is valid for addressing too. */
10547 m
->fs
.drap_valid
= true;
10551 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10553 rtx x
= plus_constant (stack_pointer_rtx
, UNITS_PER_WORD
);
10554 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10555 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10556 RTX_FRAME_RELATED_P (insn
) = 1;
10558 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10561 /* When the frame pointer is the CFA, and we pop it, we are
10562 swapping back to the stack pointer as the CFA. This happens
10563 for stack frames that don't allocate other data, so we assume
10564 the stack pointer is now pointing at the return address, i.e.
10565 the function entry state, which makes the offset be 1 word. */
10566 if (reg
== hard_frame_pointer_rtx
)
10568 m
->fs
.fp_valid
= false;
10569 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10571 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10572 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10574 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10575 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10576 GEN_INT (m
->fs
.cfa_offset
)));
10577 RTX_FRAME_RELATED_P (insn
) = 1;
10582 /* Emit code to restore saved registers using POP insns. */
10585 ix86_emit_restore_regs_using_pop (void)
10587 unsigned int regno
;
10589 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10590 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10591 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10594 /* Emit code and notes for the LEAVE instruction. */
10597 ix86_emit_leave (void)
10599 struct machine_function
*m
= cfun
->machine
;
10600 rtx insn
= emit_insn (ix86_gen_leave ());
10602 ix86_add_queued_cfa_restore_notes (insn
);
10604 gcc_assert (m
->fs
.fp_valid
);
10605 m
->fs
.sp_valid
= true;
10606 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10607 m
->fs
.fp_valid
= false;
10609 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10611 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10612 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10614 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10615 plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
));
10616 RTX_FRAME_RELATED_P (insn
) = 1;
10618 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10622 /* Emit code to restore saved registers using MOV insns.
10623 First register is restored from CFA - CFA_OFFSET. */
10625 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10626 bool maybe_eh_return
)
10628 struct machine_function
*m
= cfun
->machine
;
10629 unsigned int regno
;
10631 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10632 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10634 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10637 mem
= choose_baseaddr (cfa_offset
);
10638 mem
= gen_frame_mem (word_mode
, mem
);
10639 insn
= emit_move_insn (reg
, mem
);
10641 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10643 /* Previously we'd represented the CFA as an expression
10644 like *(%ebp - 8). We've just popped that value from
10645 the stack, which means we need to reset the CFA to
10646 the drap register. This will remain until we restore
10647 the stack pointer. */
10648 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10649 RTX_FRAME_RELATED_P (insn
) = 1;
10651 /* This means that the DRAP register is valid for addressing. */
10652 m
->fs
.drap_valid
= true;
10655 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10657 cfa_offset
-= UNITS_PER_WORD
;
10661 /* Emit code to restore saved registers using MOV insns.
10662 First register is restored from CFA - CFA_OFFSET. */
10664 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10665 bool maybe_eh_return
)
10667 unsigned int regno
;
10669 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10670 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10672 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10675 mem
= choose_baseaddr (cfa_offset
);
10676 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10677 set_mem_align (mem
, 128);
10678 emit_move_insn (reg
, mem
);
10680 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10686 /* Emit vzeroupper if needed. */
10689 ix86_maybe_emit_epilogue_vzeroupper (void)
10691 if (TARGET_VZEROUPPER
10692 && !TREE_THIS_VOLATILE (cfun
->decl
)
10693 && !cfun
->machine
->caller_return_avx256_p
)
10694 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10697 /* Restore function stack, frame, and registers. */
10700 ix86_expand_epilogue (int style
)
10702 struct machine_function
*m
= cfun
->machine
;
10703 struct machine_frame_state frame_state_save
= m
->fs
;
10704 struct ix86_frame frame
;
10705 bool restore_regs_via_mov
;
10708 ix86_finalize_stack_realign_flags ();
10709 ix86_compute_frame_layout (&frame
);
10711 m
->fs
.sp_valid
= (!frame_pointer_needed
10712 || (current_function_sp_is_unchanging
10713 && !stack_realign_fp
));
10714 gcc_assert (!m
->fs
.sp_valid
10715 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10717 /* The FP must be valid if the frame pointer is present. */
10718 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10719 gcc_assert (!m
->fs
.fp_valid
10720 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10722 /* We must have *some* valid pointer to the stack frame. */
10723 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10725 /* The DRAP is never valid at this point. */
10726 gcc_assert (!m
->fs
.drap_valid
);
10728 /* See the comment about red zone and frame
10729 pointer usage in ix86_expand_prologue. */
10730 if (frame_pointer_needed
&& frame
.red_zone_size
)
10731 emit_insn (gen_memory_blockage ());
10733 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10734 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10736 /* Determine the CFA offset of the end of the red-zone. */
10737 m
->fs
.red_zone_offset
= 0;
10738 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10740 /* The red-zone begins below the return address. */
10741 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10743 /* When the register save area is in the aligned portion of
10744 the stack, determine the maximum runtime displacement that
10745 matches up with the aligned frame. */
10746 if (stack_realign_drap
)
10747 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10751 /* Special care must be taken for the normal return case of a function
10752 using eh_return: the eax and edx registers are marked as saved, but
10753 not restored along this path. Adjust the save location to match. */
10754 if (crtl
->calls_eh_return
&& style
!= 2)
10755 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10757 /* EH_RETURN requires the use of moves to function properly. */
10758 if (crtl
->calls_eh_return
)
10759 restore_regs_via_mov
= true;
10760 /* SEH requires the use of pops to identify the epilogue. */
10761 else if (TARGET_SEH
)
10762 restore_regs_via_mov
= false;
10763 /* If we're only restoring one register and sp is not valid then
10764 using a move instruction to restore the register since it's
10765 less work than reloading sp and popping the register. */
10766 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10767 restore_regs_via_mov
= true;
10768 else if (TARGET_EPILOGUE_USING_MOVE
10769 && cfun
->machine
->use_fast_prologue_epilogue
10770 && (frame
.nregs
> 1
10771 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10772 restore_regs_via_mov
= true;
10773 else if (frame_pointer_needed
10775 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10776 restore_regs_via_mov
= true;
10777 else if (frame_pointer_needed
10778 && TARGET_USE_LEAVE
10779 && cfun
->machine
->use_fast_prologue_epilogue
10780 && frame
.nregs
== 1)
10781 restore_regs_via_mov
= true;
10783 restore_regs_via_mov
= false;
10785 if (restore_regs_via_mov
|| frame
.nsseregs
)
10787 /* Ensure that the entire register save area is addressable via
10788 the stack pointer, if we will restore via sp. */
10790 && m
->fs
.sp_offset
> 0x7fffffff
10791 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10792 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10794 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10795 GEN_INT (m
->fs
.sp_offset
10796 - frame
.sse_reg_save_offset
),
10798 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10802 /* If there are any SSE registers to restore, then we have to do it
10803 via moves, since there's obviously no pop for SSE regs. */
10804 if (frame
.nsseregs
)
10805 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10808 if (restore_regs_via_mov
)
10813 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10815 /* eh_return epilogues need %ecx added to the stack pointer. */
10818 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10820 /* Stack align doesn't work with eh_return. */
10821 gcc_assert (!stack_realign_drap
);
10822 /* Neither does regparm nested functions. */
10823 gcc_assert (!ix86_static_chain_on_stack
);
10825 if (frame_pointer_needed
)
10827 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10828 t
= plus_constant (t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10829 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10831 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10832 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10834 /* Note that we use SA as a temporary CFA, as the return
10835 address is at the proper place relative to it. We
10836 pretend this happens at the FP restore insn because
10837 prior to this insn the FP would be stored at the wrong
10838 offset relative to SA, and after this insn we have no
10839 other reasonable register to use for the CFA. We don't
10840 bother resetting the CFA to the SP for the duration of
10841 the return insn. */
10842 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10843 plus_constant (sa
, UNITS_PER_WORD
));
10844 ix86_add_queued_cfa_restore_notes (insn
);
10845 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10846 RTX_FRAME_RELATED_P (insn
) = 1;
10848 m
->fs
.cfa_reg
= sa
;
10849 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10850 m
->fs
.fp_valid
= false;
10852 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10853 const0_rtx
, style
, false);
10857 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10858 t
= plus_constant (t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10859 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10860 ix86_add_queued_cfa_restore_notes (insn
);
10862 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10863 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10865 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10866 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10867 plus_constant (stack_pointer_rtx
,
10869 RTX_FRAME_RELATED_P (insn
) = 1;
10872 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10873 m
->fs
.sp_valid
= true;
10878 /* SEH requires that the function end with (1) a stack adjustment
10879 if necessary, (2) a sequence of pops, and (3) a return or
10880 jump instruction. Prevent insns from the function body from
10881 being scheduled into this sequence. */
10884 /* Prevent a catch region from being adjacent to the standard
10885 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10886 several other flags that would be interesting to test are
10888 if (flag_non_call_exceptions
)
10889 emit_insn (gen_nops (const1_rtx
));
10891 emit_insn (gen_blockage ());
10894 /* First step is to deallocate the stack frame so that we can
10895 pop the registers. */
10896 if (!m
->fs
.sp_valid
)
10898 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10899 GEN_INT (m
->fs
.fp_offset
10900 - frame
.reg_save_offset
),
10903 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10905 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10906 GEN_INT (m
->fs
.sp_offset
10907 - frame
.reg_save_offset
),
10909 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10912 ix86_emit_restore_regs_using_pop ();
10915 /* If we used a stack pointer and haven't already got rid of it,
10917 if (m
->fs
.fp_valid
)
10919 /* If the stack pointer is valid and pointing at the frame
10920 pointer store address, then we only need a pop. */
10921 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10922 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10923 /* Leave results in shorter dependency chains on CPUs that are
10924 able to grok it fast. */
10925 else if (TARGET_USE_LEAVE
10926 || optimize_function_for_size_p (cfun
)
10927 || !cfun
->machine
->use_fast_prologue_epilogue
)
10928 ix86_emit_leave ();
10931 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10932 hard_frame_pointer_rtx
,
10933 const0_rtx
, style
, !using_drap
);
10934 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10940 int param_ptr_offset
= UNITS_PER_WORD
;
10943 gcc_assert (stack_realign_drap
);
10945 if (ix86_static_chain_on_stack
)
10946 param_ptr_offset
+= UNITS_PER_WORD
;
10947 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10948 param_ptr_offset
+= UNITS_PER_WORD
;
10950 insn
= emit_insn (gen_rtx_SET
10951 (VOIDmode
, stack_pointer_rtx
,
10952 gen_rtx_PLUS (Pmode
,
10954 GEN_INT (-param_ptr_offset
))));
10955 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10956 m
->fs
.cfa_offset
= param_ptr_offset
;
10957 m
->fs
.sp_offset
= param_ptr_offset
;
10958 m
->fs
.realigned
= false;
10960 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10961 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10962 GEN_INT (param_ptr_offset
)));
10963 RTX_FRAME_RELATED_P (insn
) = 1;
10965 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10966 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
10969 /* At this point the stack pointer must be valid, and we must have
10970 restored all of the registers. We may not have deallocated the
10971 entire stack frame. We've delayed this until now because it may
10972 be possible to merge the local stack deallocation with the
10973 deallocation forced by ix86_static_chain_on_stack. */
10974 gcc_assert (m
->fs
.sp_valid
);
10975 gcc_assert (!m
->fs
.fp_valid
);
10976 gcc_assert (!m
->fs
.realigned
);
10977 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
10979 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10980 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
10984 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10986 /* Sibcall epilogues don't want a return instruction. */
10989 m
->fs
= frame_state_save
;
10993 /* Emit vzeroupper if needed. */
10994 ix86_maybe_emit_epilogue_vzeroupper ();
10996 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
10998 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11000 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11001 address, do explicit add, and jump indirectly to the caller. */
11003 if (crtl
->args
.pops_args
>= 65536)
11005 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11008 /* There is no "pascal" calling convention in any 64bit ABI. */
11009 gcc_assert (!TARGET_64BIT
);
11011 insn
= emit_insn (gen_pop (ecx
));
11012 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11013 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11015 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11016 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11017 add_reg_note (insn
, REG_CFA_REGISTER
,
11018 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11019 RTX_FRAME_RELATED_P (insn
) = 1;
11021 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11023 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11026 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11029 emit_jump_insn (gen_simple_return_internal ());
11031 /* Restore the state back to the state from the prologue,
11032 so that it's correct for the next epilogue. */
11033 m
->fs
= frame_state_save
;
11036 /* Reset from the function's potential modifications. */
11039 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11040 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11042 if (pic_offset_table_rtx
)
11043 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11045 /* Mach-O doesn't support labels at the end of objects, so if
11046 it looks like we might want one, insert a NOP. */
11048 rtx insn
= get_last_insn ();
11049 rtx deleted_debug_label
= NULL_RTX
;
11052 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11054 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11055 notes only, instead set their CODE_LABEL_NUMBER to -1,
11056 otherwise there would be code generation differences
11057 in between -g and -g0. */
11058 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11059 deleted_debug_label
= insn
;
11060 insn
= PREV_INSN (insn
);
11065 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11066 fputs ("\tnop\n", file
);
11067 else if (deleted_debug_label
)
11068 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11069 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11070 CODE_LABEL_NUMBER (insn
) = -1;
11076 /* Return a scratch register to use in the split stack prologue. The
11077 split stack prologue is used for -fsplit-stack. It is the first
11078 instructions in the function, even before the regular prologue.
11079 The scratch register can be any caller-saved register which is not
11080 used for parameters or for the static chain. */
11082 static unsigned int
11083 split_stack_prologue_scratch_regno (void)
11092 is_fastcall
= (lookup_attribute ("fastcall",
11093 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11095 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11099 if (DECL_STATIC_CHAIN (cfun
->decl
))
11101 sorry ("-fsplit-stack does not support fastcall with "
11102 "nested function");
11103 return INVALID_REGNUM
;
11107 else if (regparm
< 3)
11109 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11115 sorry ("-fsplit-stack does not support 2 register "
11116 " parameters for a nested function");
11117 return INVALID_REGNUM
;
11124 /* FIXME: We could make this work by pushing a register
11125 around the addition and comparison. */
11126 sorry ("-fsplit-stack does not support 3 register parameters");
11127 return INVALID_REGNUM
;
11132 /* A SYMBOL_REF for the function which allocates new stackspace for
11135 static GTY(()) rtx split_stack_fn
;
11137 /* A SYMBOL_REF for the more stack function when using the large
11140 static GTY(()) rtx split_stack_fn_large
;
11142 /* Handle -fsplit-stack. These are the first instructions in the
11143 function, even before the regular prologue. */
11146 ix86_expand_split_stack_prologue (void)
11148 struct ix86_frame frame
;
11149 HOST_WIDE_INT allocate
;
11150 unsigned HOST_WIDE_INT args_size
;
11151 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11152 rtx scratch_reg
= NULL_RTX
;
11153 rtx varargs_label
= NULL_RTX
;
11156 gcc_assert (flag_split_stack
&& reload_completed
);
11158 ix86_finalize_stack_realign_flags ();
11159 ix86_compute_frame_layout (&frame
);
11160 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11162 /* This is the label we will branch to if we have enough stack
11163 space. We expect the basic block reordering pass to reverse this
11164 branch if optimizing, so that we branch in the unlikely case. */
11165 label
= gen_label_rtx ();
11167 /* We need to compare the stack pointer minus the frame size with
11168 the stack boundary in the TCB. The stack boundary always gives
11169 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11170 can compare directly. Otherwise we need to do an addition. */
11172 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11173 UNSPEC_STACK_CHECK
);
11174 limit
= gen_rtx_CONST (Pmode
, limit
);
11175 limit
= gen_rtx_MEM (Pmode
, limit
);
11176 if (allocate
< SPLIT_STACK_AVAILABLE
)
11177 current
= stack_pointer_rtx
;
11180 unsigned int scratch_regno
;
11183 /* We need a scratch register to hold the stack pointer minus
11184 the required frame size. Since this is the very start of the
11185 function, the scratch register can be any caller-saved
11186 register which is not used for parameters. */
11187 offset
= GEN_INT (- allocate
);
11188 scratch_regno
= split_stack_prologue_scratch_regno ();
11189 if (scratch_regno
== INVALID_REGNUM
)
11191 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11192 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11194 /* We don't use ix86_gen_add3 in this case because it will
11195 want to split to lea, but when not optimizing the insn
11196 will not be split after this point. */
11197 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11198 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11203 emit_move_insn (scratch_reg
, offset
);
11204 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11205 stack_pointer_rtx
));
11207 current
= scratch_reg
;
11210 ix86_expand_branch (GEU
, current
, limit
, label
);
11211 jump_insn
= get_last_insn ();
11212 JUMP_LABEL (jump_insn
) = label
;
11214 /* Mark the jump as very likely to be taken. */
11215 add_reg_note (jump_insn
, REG_BR_PROB
,
11216 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11218 if (split_stack_fn
== NULL_RTX
)
11219 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11220 fn
= split_stack_fn
;
11222 /* Get more stack space. We pass in the desired stack space and the
11223 size of the arguments to copy to the new stack. In 32-bit mode
11224 we push the parameters; __morestack will return on a new stack
11225 anyhow. In 64-bit mode we pass the parameters in r10 and
11227 allocate_rtx
= GEN_INT (allocate
);
11228 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11229 call_fusage
= NULL_RTX
;
11234 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11235 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11237 /* If this function uses a static chain, it will be in %r10.
11238 Preserve it across the call to __morestack. */
11239 if (DECL_STATIC_CHAIN (cfun
->decl
))
11243 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11244 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11245 use_reg (&call_fusage
, rax
);
11248 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11250 HOST_WIDE_INT argval
;
11252 gcc_assert (Pmode
== DImode
);
11253 /* When using the large model we need to load the address
11254 into a register, and we've run out of registers. So we
11255 switch to a different calling convention, and we call a
11256 different function: __morestack_large. We pass the
11257 argument size in the upper 32 bits of r10 and pass the
11258 frame size in the lower 32 bits. */
11259 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11260 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11262 if (split_stack_fn_large
== NULL_RTX
)
11263 split_stack_fn_large
=
11264 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11266 if (ix86_cmodel
== CM_LARGE_PIC
)
11270 label
= gen_label_rtx ();
11271 emit_label (label
);
11272 LABEL_PRESERVE_P (label
) = 1;
11273 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11274 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11275 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11276 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11278 x
= gen_rtx_CONST (Pmode
, x
);
11279 emit_move_insn (reg11
, x
);
11280 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11281 x
= gen_const_mem (Pmode
, x
);
11282 emit_move_insn (reg11
, x
);
11285 emit_move_insn (reg11
, split_stack_fn_large
);
11289 argval
= ((args_size
<< 16) << 16) + allocate
;
11290 emit_move_insn (reg10
, GEN_INT (argval
));
11294 emit_move_insn (reg10
, allocate_rtx
);
11295 emit_move_insn (reg11
, GEN_INT (args_size
));
11296 use_reg (&call_fusage
, reg11
);
11299 use_reg (&call_fusage
, reg10
);
11303 emit_insn (gen_push (GEN_INT (args_size
)));
11304 emit_insn (gen_push (allocate_rtx
));
11306 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11307 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11309 add_function_usage_to (call_insn
, call_fusage
);
11311 /* In order to make call/return prediction work right, we now need
11312 to execute a return instruction. See
11313 libgcc/config/i386/morestack.S for the details on how this works.
11315 For flow purposes gcc must not see this as a return
11316 instruction--we need control flow to continue at the subsequent
11317 label. Therefore, we use an unspec. */
11318 gcc_assert (crtl
->args
.pops_args
< 65536);
11319 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11321 /* If we are in 64-bit mode and this function uses a static chain,
11322 we saved %r10 in %rax before calling _morestack. */
11323 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11324 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11325 gen_rtx_REG (word_mode
, AX_REG
));
11327 /* If this function calls va_start, we need to store a pointer to
11328 the arguments on the old stack, because they may not have been
11329 all copied to the new stack. At this point the old stack can be
11330 found at the frame pointer value used by __morestack, because
11331 __morestack has set that up before calling back to us. Here we
11332 store that pointer in a scratch register, and in
11333 ix86_expand_prologue we store the scratch register in a stack
11335 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11337 unsigned int scratch_regno
;
11341 scratch_regno
= split_stack_prologue_scratch_regno ();
11342 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11343 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11347 return address within this function
11348 return address of caller of this function
11350 So we add three words to get to the stack arguments.
11354 return address within this function
11355 first argument to __morestack
11356 second argument to __morestack
11357 return address of caller of this function
11359 So we add five words to get to the stack arguments.
11361 words
= TARGET_64BIT
? 3 : 5;
11362 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11363 gen_rtx_PLUS (Pmode
, frame_reg
,
11364 GEN_INT (words
* UNITS_PER_WORD
))));
11366 varargs_label
= gen_label_rtx ();
11367 emit_jump_insn (gen_jump (varargs_label
));
11368 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11373 emit_label (label
);
11374 LABEL_NUSES (label
) = 1;
11376 /* If this function calls va_start, we now have to set the scratch
11377 register for the case where we do not call __morestack. In this
11378 case we need to set it based on the stack pointer. */
11379 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11381 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11382 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11383 GEN_INT (UNITS_PER_WORD
))));
11385 emit_label (varargs_label
);
11386 LABEL_NUSES (varargs_label
) = 1;
11390 /* We may have to tell the dataflow pass that the split stack prologue
11391 is initializing a scratch register. */
11394 ix86_live_on_entry (bitmap regs
)
11396 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11398 gcc_assert (flag_split_stack
);
11399 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11403 /* Determine if op is suitable SUBREG RTX for address. */
11406 ix86_address_subreg_operand (rtx op
)
11408 enum machine_mode mode
;
11413 mode
= GET_MODE (op
);
11415 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11418 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11419 failures when the register is one word out of a two word structure. */
11420 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11423 /* Allow only SUBREGs of non-eliminable hard registers. */
11424 return register_no_elim_operand (op
, mode
);
11427 /* Extract the parts of an RTL expression that is a valid memory address
11428 for an instruction. Return 0 if the structure of the address is
11429 grossly off. Return -1 if the address contains ASHIFT, so it is not
11430 strictly valid, but still used for computing length of lea instruction. */
11433 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11435 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11436 rtx base_reg
, index_reg
;
11437 HOST_WIDE_INT scale
= 1;
11438 rtx scale_rtx
= NULL_RTX
;
11441 enum ix86_address_seg seg
= SEG_DEFAULT
;
11443 /* Allow zero-extended SImode addresses,
11444 they will be emitted with addr32 prefix. */
11445 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11447 if (GET_CODE (addr
) == ZERO_EXTEND
11448 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11449 addr
= XEXP (addr
, 0);
11450 else if (GET_CODE (addr
) == AND
11451 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11453 addr
= XEXP (addr
, 0);
11455 /* Adjust SUBREGs. */
11456 if (GET_CODE (addr
) == SUBREG
11457 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11458 addr
= SUBREG_REG (addr
);
11459 else if (GET_MODE (addr
) == DImode
)
11460 addr
= gen_rtx_SUBREG (SImode
, addr
, 0);
11468 else if (GET_CODE (addr
) == SUBREG
)
11470 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11475 else if (GET_CODE (addr
) == PLUS
)
11477 rtx addends
[4], op
;
11485 addends
[n
++] = XEXP (op
, 1);
11488 while (GET_CODE (op
) == PLUS
);
11493 for (i
= n
; i
>= 0; --i
)
11496 switch (GET_CODE (op
))
11501 index
= XEXP (op
, 0);
11502 scale_rtx
= XEXP (op
, 1);
11508 index
= XEXP (op
, 0);
11509 tmp
= XEXP (op
, 1);
11510 if (!CONST_INT_P (tmp
))
11512 scale
= INTVAL (tmp
);
11513 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11515 scale
= 1 << scale
;
11520 if (GET_CODE (op
) != UNSPEC
)
11525 if (XINT (op
, 1) == UNSPEC_TP
11526 && TARGET_TLS_DIRECT_SEG_REFS
11527 && seg
== SEG_DEFAULT
)
11528 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11534 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11561 else if (GET_CODE (addr
) == MULT
)
11563 index
= XEXP (addr
, 0); /* index*scale */
11564 scale_rtx
= XEXP (addr
, 1);
11566 else if (GET_CODE (addr
) == ASHIFT
)
11568 /* We're called for lea too, which implements ashift on occasion. */
11569 index
= XEXP (addr
, 0);
11570 tmp
= XEXP (addr
, 1);
11571 if (!CONST_INT_P (tmp
))
11573 scale
= INTVAL (tmp
);
11574 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11576 scale
= 1 << scale
;
11580 disp
= addr
; /* displacement */
11586 else if (GET_CODE (index
) == SUBREG
11587 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11593 /* Address override works only on the (%reg) part of %fs:(%reg). */
11594 if (seg
!= SEG_DEFAULT
11595 && ((base
&& GET_MODE (base
) != word_mode
)
11596 || (index
&& GET_MODE (index
) != word_mode
)))
11599 /* Extract the integral value of scale. */
11602 if (!CONST_INT_P (scale_rtx
))
11604 scale
= INTVAL (scale_rtx
);
11607 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11608 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11610 /* Avoid useless 0 displacement. */
11611 if (disp
== const0_rtx
&& (base
|| index
))
11614 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11615 if (base_reg
&& index_reg
&& scale
== 1
11616 && (index_reg
== arg_pointer_rtx
11617 || index_reg
== frame_pointer_rtx
11618 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11621 tmp
= base
, base
= index
, index
= tmp
;
11622 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11625 /* Special case: %ebp cannot be encoded as a base without a displacement.
11629 && (base_reg
== hard_frame_pointer_rtx
11630 || base_reg
== frame_pointer_rtx
11631 || base_reg
== arg_pointer_rtx
11632 || (REG_P (base_reg
)
11633 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11634 || REGNO (base_reg
) == R13_REG
))))
11637 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11638 Avoid this by transforming to [%esi+0].
11639 Reload calls address legitimization without cfun defined, so we need
11640 to test cfun for being non-NULL. */
11641 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11642 && base_reg
&& !index_reg
&& !disp
11643 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11646 /* Special case: encode reg+reg instead of reg*2. */
11647 if (!base
&& index
&& scale
== 2)
11648 base
= index
, base_reg
= index_reg
, scale
= 1;
11650 /* Special case: scaling cannot be encoded without base or displacement. */
11651 if (!base
&& !disp
&& index
&& scale
!= 1)
11655 out
->index
= index
;
11657 out
->scale
= scale
;
11663 /* Return cost of the memory address x.
11664 For i386, it is better to use a complex address than let gcc copy
11665 the address into a reg and make a new pseudo. But not if the address
11666 requires to two regs - that would mean more pseudos with longer
11669 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11671 struct ix86_address parts
;
11673 int ok
= ix86_decompose_address (x
, &parts
);
11677 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11678 parts
.base
= SUBREG_REG (parts
.base
);
11679 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11680 parts
.index
= SUBREG_REG (parts
.index
);
11682 /* Attempt to minimize number of registers in the address. */
11684 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11686 && (!REG_P (parts
.index
)
11687 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11691 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11693 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11694 && parts
.base
!= parts
.index
)
11697 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11698 since it's predecode logic can't detect the length of instructions
11699 and it degenerates to vector decoded. Increase cost of such
11700 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11701 to split such addresses or even refuse such addresses at all.
11703 Following addressing modes are affected:
11708 The first and last case may be avoidable by explicitly coding the zero in
11709 memory address, but I don't have AMD-K6 machine handy to check this
11713 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11714 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11715 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11721 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11722 this is used for to form addresses to local data when -fPIC is in
11726 darwin_local_data_pic (rtx disp
)
11728 return (GET_CODE (disp
) == UNSPEC
11729 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11732 /* Determine if a given RTX is a valid constant. We already know this
11733 satisfies CONSTANT_P. */
11736 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11738 switch (GET_CODE (x
))
11743 if (GET_CODE (x
) == PLUS
)
11745 if (!CONST_INT_P (XEXP (x
, 1)))
11750 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11753 /* Only some unspecs are valid as "constants". */
11754 if (GET_CODE (x
) == UNSPEC
)
11755 switch (XINT (x
, 1))
11758 case UNSPEC_GOTOFF
:
11759 case UNSPEC_PLTOFF
:
11760 return TARGET_64BIT
;
11762 case UNSPEC_NTPOFF
:
11763 x
= XVECEXP (x
, 0, 0);
11764 return (GET_CODE (x
) == SYMBOL_REF
11765 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11766 case UNSPEC_DTPOFF
:
11767 x
= XVECEXP (x
, 0, 0);
11768 return (GET_CODE (x
) == SYMBOL_REF
11769 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11774 /* We must have drilled down to a symbol. */
11775 if (GET_CODE (x
) == LABEL_REF
)
11777 if (GET_CODE (x
) != SYMBOL_REF
)
11782 /* TLS symbols are never valid. */
11783 if (SYMBOL_REF_TLS_MODEL (x
))
11786 /* DLLIMPORT symbols are never valid. */
11787 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11788 && SYMBOL_REF_DLLIMPORT_P (x
))
11792 /* mdynamic-no-pic */
11793 if (MACHO_DYNAMIC_NO_PIC_P
)
11794 return machopic_symbol_defined_p (x
);
11799 if (GET_MODE (x
) == TImode
11800 && x
!= CONST0_RTX (TImode
)
11806 if (!standard_sse_constant_p (x
))
11813 /* Otherwise we handle everything else in the move patterns. */
11817 /* Determine if it's legal to put X into the constant pool. This
11818 is not possible for the address of thread-local symbols, which
11819 is checked above. */
11822 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11824 /* We can always put integral constants and vectors in memory. */
11825 switch (GET_CODE (x
))
11835 return !ix86_legitimate_constant_p (mode
, x
);
11839 /* Nonzero if the constant value X is a legitimate general operand
11840 when generating PIC code. It is given that flag_pic is on and
11841 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11844 legitimate_pic_operand_p (rtx x
)
11848 switch (GET_CODE (x
))
11851 inner
= XEXP (x
, 0);
11852 if (GET_CODE (inner
) == PLUS
11853 && CONST_INT_P (XEXP (inner
, 1)))
11854 inner
= XEXP (inner
, 0);
11856 /* Only some unspecs are valid as "constants". */
11857 if (GET_CODE (inner
) == UNSPEC
)
11858 switch (XINT (inner
, 1))
11861 case UNSPEC_GOTOFF
:
11862 case UNSPEC_PLTOFF
:
11863 return TARGET_64BIT
;
11865 x
= XVECEXP (inner
, 0, 0);
11866 return (GET_CODE (x
) == SYMBOL_REF
11867 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11868 case UNSPEC_MACHOPIC_OFFSET
:
11869 return legitimate_pic_address_disp_p (x
);
11877 return legitimate_pic_address_disp_p (x
);
11884 /* Determine if a given CONST RTX is a valid memory displacement
11888 legitimate_pic_address_disp_p (rtx disp
)
11892 /* In 64bit mode we can allow direct addresses of symbols and labels
11893 when they are not dynamic symbols. */
11896 rtx op0
= disp
, op1
;
11898 switch (GET_CODE (disp
))
11904 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11906 op0
= XEXP (XEXP (disp
, 0), 0);
11907 op1
= XEXP (XEXP (disp
, 0), 1);
11908 if (!CONST_INT_P (op1
)
11909 || INTVAL (op1
) >= 16*1024*1024
11910 || INTVAL (op1
) < -16*1024*1024)
11912 if (GET_CODE (op0
) == LABEL_REF
)
11914 if (GET_CODE (op0
) == CONST
11915 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
11916 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
11918 if (GET_CODE (op0
) == UNSPEC
11919 && XINT (op0
, 1) == UNSPEC_PCREL
)
11921 if (GET_CODE (op0
) != SYMBOL_REF
)
11926 /* TLS references should always be enclosed in UNSPEC. */
11927 if (SYMBOL_REF_TLS_MODEL (op0
))
11929 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
11930 && ix86_cmodel
!= CM_LARGE_PIC
)
11938 if (GET_CODE (disp
) != CONST
)
11940 disp
= XEXP (disp
, 0);
11944 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11945 of GOT tables. We should not need these anyway. */
11946 if (GET_CODE (disp
) != UNSPEC
11947 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
11948 && XINT (disp
, 1) != UNSPEC_GOTOFF
11949 && XINT (disp
, 1) != UNSPEC_PCREL
11950 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
11953 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
11954 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
11960 if (GET_CODE (disp
) == PLUS
)
11962 if (!CONST_INT_P (XEXP (disp
, 1)))
11964 disp
= XEXP (disp
, 0);
11968 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
11971 if (GET_CODE (disp
) != UNSPEC
)
11974 switch (XINT (disp
, 1))
11979 /* We need to check for both symbols and labels because VxWorks loads
11980 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11982 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11983 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
11984 case UNSPEC_GOTOFF
:
11985 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11986 While ABI specify also 32bit relocation but we don't produce it in
11987 small PIC model at all. */
11988 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11989 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
11991 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
11993 case UNSPEC_GOTTPOFF
:
11994 case UNSPEC_GOTNTPOFF
:
11995 case UNSPEC_INDNTPOFF
:
11998 disp
= XVECEXP (disp
, 0, 0);
11999 return (GET_CODE (disp
) == SYMBOL_REF
12000 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12001 case UNSPEC_NTPOFF
:
12002 disp
= XVECEXP (disp
, 0, 0);
12003 return (GET_CODE (disp
) == SYMBOL_REF
12004 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12005 case UNSPEC_DTPOFF
:
12006 disp
= XVECEXP (disp
, 0, 0);
12007 return (GET_CODE (disp
) == SYMBOL_REF
12008 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12014 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12015 replace the input X, or the original X if no replacement is called for.
12016 The output parameter *WIN is 1 if the calling macro should goto WIN,
12017 0 if it should not. */
12020 ix86_legitimize_reload_address (rtx x
,
12021 enum machine_mode mode ATTRIBUTE_UNUSED
,
12022 int opnum
, int type
,
12023 int ind_levels ATTRIBUTE_UNUSED
)
12025 /* Reload can generate:
12027 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12031 This RTX is rejected from ix86_legitimate_address_p due to
12032 non-strictness of base register 97. Following this rejection,
12033 reload pushes all three components into separate registers,
12034 creating invalid memory address RTX.
12036 Following code reloads only the invalid part of the
12037 memory address RTX. */
12039 if (GET_CODE (x
) == PLUS
12040 && REG_P (XEXP (x
, 1))
12041 && GET_CODE (XEXP (x
, 0)) == PLUS
12042 && REG_P (XEXP (XEXP (x
, 0), 1)))
12045 bool something_reloaded
= false;
12047 base
= XEXP (XEXP (x
, 0), 1);
12048 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12050 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12051 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12052 opnum
, (enum reload_type
) type
);
12053 something_reloaded
= true;
12056 index
= XEXP (x
, 1);
12057 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12059 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12060 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12061 opnum
, (enum reload_type
) type
);
12062 something_reloaded
= true;
12065 gcc_assert (something_reloaded
);
12072 /* Recognizes RTL expressions that are valid memory addresses for an
12073 instruction. The MODE argument is the machine mode for the MEM
12074 expression that wants to use this address.
12076 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12077 convert common non-canonical forms to canonical form so that they will
12081 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12082 rtx addr
, bool strict
)
12084 struct ix86_address parts
;
12085 rtx base
, index
, disp
;
12086 HOST_WIDE_INT scale
;
12088 /* Since constant address in x32 is signed extended to 64bit,
12089 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12091 && CONST_INT_P (addr
)
12092 && INTVAL (addr
) < 0)
12095 if (ix86_decompose_address (addr
, &parts
) <= 0)
12096 /* Decomposition failed. */
12100 index
= parts
.index
;
12102 scale
= parts
.scale
;
12104 /* Validate base register. */
12111 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12112 reg
= SUBREG_REG (base
);
12114 /* Base is not a register. */
12117 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12120 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12121 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12122 /* Base is not valid. */
12126 /* Validate index register. */
12133 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12134 reg
= SUBREG_REG (index
);
12136 /* Index is not a register. */
12139 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12142 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12143 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12144 /* Index is not valid. */
12148 /* Index and base should have the same mode. */
12150 && GET_MODE (base
) != GET_MODE (index
))
12153 /* Validate scale factor. */
12157 /* Scale without index. */
12160 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12161 /* Scale is not a valid multiplier. */
12165 /* Validate displacement. */
12168 if (GET_CODE (disp
) == CONST
12169 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12170 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12171 switch (XINT (XEXP (disp
, 0), 1))
12173 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12174 used. While ABI specify also 32bit relocations, we don't produce
12175 them at all and use IP relative instead. */
12177 case UNSPEC_GOTOFF
:
12178 gcc_assert (flag_pic
);
12180 goto is_legitimate_pic
;
12182 /* 64bit address unspec. */
12185 case UNSPEC_GOTPCREL
:
12187 gcc_assert (flag_pic
);
12188 goto is_legitimate_pic
;
12190 case UNSPEC_GOTTPOFF
:
12191 case UNSPEC_GOTNTPOFF
:
12192 case UNSPEC_INDNTPOFF
:
12193 case UNSPEC_NTPOFF
:
12194 case UNSPEC_DTPOFF
:
12197 case UNSPEC_STACK_CHECK
:
12198 gcc_assert (flag_split_stack
);
12202 /* Invalid address unspec. */
12206 else if (SYMBOLIC_CONST (disp
)
12210 && MACHOPIC_INDIRECT
12211 && !machopic_operand_p (disp
)
12217 if (TARGET_64BIT
&& (index
|| base
))
12219 /* foo@dtpoff(%rX) is ok. */
12220 if (GET_CODE (disp
) != CONST
12221 || GET_CODE (XEXP (disp
, 0)) != PLUS
12222 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12223 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12224 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12225 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12226 /* Non-constant pic memory reference. */
12229 else if ((!TARGET_MACHO
|| flag_pic
)
12230 && ! legitimate_pic_address_disp_p (disp
))
12231 /* Displacement is an invalid pic construct. */
12234 else if (MACHO_DYNAMIC_NO_PIC_P
12235 && !ix86_legitimate_constant_p (Pmode
, disp
))
12236 /* displacment must be referenced via non_lazy_pointer */
12240 /* This code used to verify that a symbolic pic displacement
12241 includes the pic_offset_table_rtx register.
12243 While this is good idea, unfortunately these constructs may
12244 be created by "adds using lea" optimization for incorrect
12253 This code is nonsensical, but results in addressing
12254 GOT table with pic_offset_table_rtx base. We can't
12255 just refuse it easily, since it gets matched by
12256 "addsi3" pattern, that later gets split to lea in the
12257 case output register differs from input. While this
12258 can be handled by separate addsi pattern for this case
12259 that never results in lea, this seems to be easier and
12260 correct fix for crash to disable this test. */
12262 else if (GET_CODE (disp
) != LABEL_REF
12263 && !CONST_INT_P (disp
)
12264 && (GET_CODE (disp
) != CONST
12265 || !ix86_legitimate_constant_p (Pmode
, disp
))
12266 && (GET_CODE (disp
) != SYMBOL_REF
12267 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12268 /* Displacement is not constant. */
12270 else if (TARGET_64BIT
12271 && !x86_64_immediate_operand (disp
, VOIDmode
))
12272 /* Displacement is out of range. */
12276 /* Everything looks valid. */
12280 /* Determine if a given RTX is a valid constant address. */
12283 constant_address_p (rtx x
)
12285 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12288 /* Return a unique alias set for the GOT. */
12290 static alias_set_type
12291 ix86_GOT_alias_set (void)
12293 static alias_set_type set
= -1;
12295 set
= new_alias_set ();
12299 /* Return a legitimate reference for ORIG (an address) using the
12300 register REG. If REG is 0, a new pseudo is generated.
12302 There are two types of references that must be handled:
12304 1. Global data references must load the address from the GOT, via
12305 the PIC reg. An insn is emitted to do this load, and the reg is
12308 2. Static data references, constant pool addresses, and code labels
12309 compute the address as an offset from the GOT, whose base is in
12310 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12311 differentiate them from global data objects. The returned
12312 address is the PIC reg + an unspec constant.
12314 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12315 reg also appears in the address. */
12318 legitimize_pic_address (rtx orig
, rtx reg
)
12321 rtx new_rtx
= orig
;
12325 if (TARGET_MACHO
&& !TARGET_64BIT
)
12328 reg
= gen_reg_rtx (Pmode
);
12329 /* Use the generic Mach-O PIC machinery. */
12330 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12334 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12336 else if (TARGET_64BIT
12337 && ix86_cmodel
!= CM_SMALL_PIC
12338 && gotoff_operand (addr
, Pmode
))
12341 /* This symbol may be referenced via a displacement from the PIC
12342 base address (@GOTOFF). */
12344 if (reload_in_progress
)
12345 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12346 if (GET_CODE (addr
) == CONST
)
12347 addr
= XEXP (addr
, 0);
12348 if (GET_CODE (addr
) == PLUS
)
12350 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12352 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12355 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12356 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12358 tmpreg
= gen_reg_rtx (Pmode
);
12361 emit_move_insn (tmpreg
, new_rtx
);
12365 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12366 tmpreg
, 1, OPTAB_DIRECT
);
12369 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12371 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12373 /* This symbol may be referenced via a displacement from the PIC
12374 base address (@GOTOFF). */
12376 if (reload_in_progress
)
12377 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12378 if (GET_CODE (addr
) == CONST
)
12379 addr
= XEXP (addr
, 0);
12380 if (GET_CODE (addr
) == PLUS
)
12382 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12384 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12387 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12388 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12389 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12393 emit_move_insn (reg
, new_rtx
);
12397 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12398 /* We can't use @GOTOFF for text labels on VxWorks;
12399 see gotoff_operand. */
12400 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12402 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12404 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12405 return legitimize_dllimport_symbol (addr
, true);
12406 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12407 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12408 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12410 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12411 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12415 /* For x64 PE-COFF there is no GOT table. So we use address
12417 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12419 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12420 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12423 reg
= gen_reg_rtx (Pmode
);
12424 emit_move_insn (reg
, new_rtx
);
12427 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12429 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12430 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12431 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12432 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12435 reg
= gen_reg_rtx (Pmode
);
12436 /* Use directly gen_movsi, otherwise the address is loaded
12437 into register for CSE. We don't want to CSE this addresses,
12438 instead we CSE addresses from the GOT table, so skip this. */
12439 emit_insn (gen_movsi (reg
, new_rtx
));
12444 /* This symbol must be referenced via a load from the
12445 Global Offset Table (@GOT). */
12447 if (reload_in_progress
)
12448 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12449 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12450 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12452 new_rtx
= force_reg (Pmode
, new_rtx
);
12453 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12454 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12455 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12458 reg
= gen_reg_rtx (Pmode
);
12459 emit_move_insn (reg
, new_rtx
);
12465 if (CONST_INT_P (addr
)
12466 && !x86_64_immediate_operand (addr
, VOIDmode
))
12470 emit_move_insn (reg
, addr
);
12474 new_rtx
= force_reg (Pmode
, addr
);
12476 else if (GET_CODE (addr
) == CONST
)
12478 addr
= XEXP (addr
, 0);
12480 /* We must match stuff we generate before. Assume the only
12481 unspecs that can get here are ours. Not that we could do
12482 anything with them anyway.... */
12483 if (GET_CODE (addr
) == UNSPEC
12484 || (GET_CODE (addr
) == PLUS
12485 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12487 gcc_assert (GET_CODE (addr
) == PLUS
);
12489 if (GET_CODE (addr
) == PLUS
)
12491 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12493 /* Check first to see if this is a constant offset from a @GOTOFF
12494 symbol reference. */
12495 if (gotoff_operand (op0
, Pmode
)
12496 && CONST_INT_P (op1
))
12500 if (reload_in_progress
)
12501 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12502 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12504 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12505 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12506 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12510 emit_move_insn (reg
, new_rtx
);
12516 if (INTVAL (op1
) < -16*1024*1024
12517 || INTVAL (op1
) >= 16*1024*1024)
12519 if (!x86_64_immediate_operand (op1
, Pmode
))
12520 op1
= force_reg (Pmode
, op1
);
12521 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12527 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12528 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12529 base
== reg
? NULL_RTX
: reg
);
12531 if (CONST_INT_P (new_rtx
))
12532 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
12535 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12537 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12538 new_rtx
= XEXP (new_rtx
, 1);
12540 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12548 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12551 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12553 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12555 if (GET_MODE (tp
) != tp_mode
)
12557 gcc_assert (GET_MODE (tp
) == SImode
);
12558 gcc_assert (tp_mode
== DImode
);
12560 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12564 tp
= copy_to_mode_reg (tp_mode
, tp
);
12569 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12571 static GTY(()) rtx ix86_tls_symbol
;
12574 ix86_tls_get_addr (void)
12576 if (!ix86_tls_symbol
)
12579 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12580 ? "___tls_get_addr" : "__tls_get_addr");
12582 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12585 return ix86_tls_symbol
;
12588 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12590 static GTY(()) rtx ix86_tls_module_base_symbol
;
12593 ix86_tls_module_base (void)
12595 if (!ix86_tls_module_base_symbol
)
12597 ix86_tls_module_base_symbol
12598 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12600 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12601 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12604 return ix86_tls_module_base_symbol
;
12607 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12608 false if we expect this to be used for a memory address and true if
12609 we expect to load the address into a register. */
12612 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12614 rtx dest
, base
, off
;
12615 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12616 enum machine_mode tp_mode
= Pmode
;
12621 case TLS_MODEL_GLOBAL_DYNAMIC
:
12622 dest
= gen_reg_rtx (Pmode
);
12627 pic
= pic_offset_table_rtx
;
12630 pic
= gen_reg_rtx (Pmode
);
12631 emit_insn (gen_set_got (pic
));
12635 if (TARGET_GNU2_TLS
)
12638 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12640 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12642 tp
= get_thread_pointer (Pmode
, true);
12643 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12645 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12649 rtx caddr
= ix86_tls_get_addr ();
12653 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12656 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
12658 insns
= get_insns ();
12661 RTL_CONST_CALL_P (insns
) = 1;
12662 emit_libcall_block (insns
, dest
, rax
, x
);
12665 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12669 case TLS_MODEL_LOCAL_DYNAMIC
:
12670 base
= gen_reg_rtx (Pmode
);
12675 pic
= pic_offset_table_rtx
;
12678 pic
= gen_reg_rtx (Pmode
);
12679 emit_insn (gen_set_got (pic
));
12683 if (TARGET_GNU2_TLS
)
12685 rtx tmp
= ix86_tls_module_base ();
12688 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12690 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12692 tp
= get_thread_pointer (Pmode
, true);
12693 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12694 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12698 rtx caddr
= ix86_tls_get_addr ();
12702 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12705 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
12707 insns
= get_insns ();
12710 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12711 share the LD_BASE result with other LD model accesses. */
12712 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12713 UNSPEC_TLS_LD_BASE
);
12715 RTL_CONST_CALL_P (insns
) = 1;
12716 emit_libcall_block (insns
, base
, rax
, eqv
);
12719 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12722 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12723 off
= gen_rtx_CONST (Pmode
, off
);
12725 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12727 if (TARGET_GNU2_TLS
)
12729 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12731 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12735 case TLS_MODEL_INITIAL_EXEC
:
12738 if (TARGET_SUN_TLS
)
12740 /* The Sun linker took the AMD64 TLS spec literally
12741 and can only handle %rax as destination of the
12742 initial executable code sequence. */
12744 dest
= gen_reg_rtx (Pmode
);
12745 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12749 /* Generate DImode references to avoid %fs:(%reg32)
12750 problems and linker IE->LE relaxation bug. */
12753 type
= UNSPEC_GOTNTPOFF
;
12757 if (reload_in_progress
)
12758 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12759 pic
= pic_offset_table_rtx
;
12760 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12762 else if (!TARGET_ANY_GNU_TLS
)
12764 pic
= gen_reg_rtx (Pmode
);
12765 emit_insn (gen_set_got (pic
));
12766 type
= UNSPEC_GOTTPOFF
;
12771 type
= UNSPEC_INDNTPOFF
;
12774 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12775 off
= gen_rtx_CONST (tp_mode
, off
);
12777 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12778 off
= gen_const_mem (tp_mode
, off
);
12779 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12781 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12783 base
= get_thread_pointer (tp_mode
,
12784 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12785 off
= force_reg (tp_mode
, off
);
12786 return gen_rtx_PLUS (tp_mode
, base
, off
);
12790 base
= get_thread_pointer (Pmode
, true);
12791 dest
= gen_reg_rtx (Pmode
);
12792 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12796 case TLS_MODEL_LOCAL_EXEC
:
12797 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12798 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12799 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12800 off
= gen_rtx_CONST (Pmode
, off
);
12802 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12804 base
= get_thread_pointer (Pmode
,
12805 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12806 return gen_rtx_PLUS (Pmode
, base
, off
);
12810 base
= get_thread_pointer (Pmode
, true);
12811 dest
= gen_reg_rtx (Pmode
);
12812 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12817 gcc_unreachable ();
12823 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12826 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12827 htab_t dllimport_map
;
12830 get_dllimport_decl (tree decl
)
12832 struct tree_map
*h
, in
;
12835 const char *prefix
;
12836 size_t namelen
, prefixlen
;
12841 if (!dllimport_map
)
12842 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12844 in
.hash
= htab_hash_pointer (decl
);
12845 in
.base
.from
= decl
;
12846 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12847 h
= (struct tree_map
*) *loc
;
12851 *loc
= h
= ggc_alloc_tree_map ();
12853 h
->base
.from
= decl
;
12854 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12855 VAR_DECL
, NULL
, ptr_type_node
);
12856 DECL_ARTIFICIAL (to
) = 1;
12857 DECL_IGNORED_P (to
) = 1;
12858 DECL_EXTERNAL (to
) = 1;
12859 TREE_READONLY (to
) = 1;
12861 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12862 name
= targetm
.strip_name_encoding (name
);
12863 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12864 ? "*__imp_" : "*__imp__";
12865 namelen
= strlen (name
);
12866 prefixlen
= strlen (prefix
);
12867 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12868 memcpy (imp_name
, prefix
, prefixlen
);
12869 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12871 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12872 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12873 SET_SYMBOL_REF_DECL (rtl
, to
);
12874 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12876 rtl
= gen_const_mem (Pmode
, rtl
);
12877 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12879 SET_DECL_RTL (to
, rtl
);
12880 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12885 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12886 true if we require the result be a register. */
12889 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12894 gcc_assert (SYMBOL_REF_DECL (symbol
));
12895 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12897 x
= DECL_RTL (imp_decl
);
12899 x
= force_reg (Pmode
, x
);
12903 /* Try machine-dependent ways of modifying an illegitimate address
12904 to be legitimate. If we find one, return the new, valid address.
12905 This macro is used in only one place: `memory_address' in explow.c.
12907 OLDX is the address as it was before break_out_memory_refs was called.
12908 In some cases it is useful to look at this to decide what needs to be done.
12910 It is always safe for this macro to do nothing. It exists to recognize
12911 opportunities to optimize the output.
12913 For the 80386, we handle X+REG by loading X into a register R and
12914 using R+REG. R will go in a general reg and indexing will be used.
12915 However, if REG is a broken-out memory address or multiplication,
12916 nothing needs to be done because REG can certainly go in a general reg.
12918 When -fpic is used, special handling is needed for symbolic references.
12919 See comments by legitimize_pic_address in i386.c for details. */
12922 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12923 enum machine_mode mode
)
12928 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12930 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12931 if (GET_CODE (x
) == CONST
12932 && GET_CODE (XEXP (x
, 0)) == PLUS
12933 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12934 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12936 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12937 (enum tls_model
) log
, false);
12938 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12941 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12943 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
12944 return legitimize_dllimport_symbol (x
, true);
12945 if (GET_CODE (x
) == CONST
12946 && GET_CODE (XEXP (x
, 0)) == PLUS
12947 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12948 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
12950 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
12951 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12955 if (flag_pic
&& SYMBOLIC_CONST (x
))
12956 return legitimize_pic_address (x
, 0);
12959 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12960 return machopic_indirect_data_reference (x
, 0);
12963 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12964 if (GET_CODE (x
) == ASHIFT
12965 && CONST_INT_P (XEXP (x
, 1))
12966 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12969 log
= INTVAL (XEXP (x
, 1));
12970 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12971 GEN_INT (1 << log
));
12974 if (GET_CODE (x
) == PLUS
)
12976 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12978 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12979 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12980 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12983 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12984 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12985 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12986 GEN_INT (1 << log
));
12989 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
12990 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
12991 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
12994 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
12995 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
12996 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
12997 GEN_INT (1 << log
));
13000 /* Put multiply first if it isn't already. */
13001 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13003 rtx tmp
= XEXP (x
, 0);
13004 XEXP (x
, 0) = XEXP (x
, 1);
13009 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13010 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13011 created by virtual register instantiation, register elimination, and
13012 similar optimizations. */
13013 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13016 x
= gen_rtx_PLUS (Pmode
,
13017 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13018 XEXP (XEXP (x
, 1), 0)),
13019 XEXP (XEXP (x
, 1), 1));
13023 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13024 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13025 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13026 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13027 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13028 && CONSTANT_P (XEXP (x
, 1)))
13031 rtx other
= NULL_RTX
;
13033 if (CONST_INT_P (XEXP (x
, 1)))
13035 constant
= XEXP (x
, 1);
13036 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13038 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13040 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13041 other
= XEXP (x
, 1);
13049 x
= gen_rtx_PLUS (Pmode
,
13050 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13051 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13052 plus_constant (other
, INTVAL (constant
)));
13056 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13059 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13062 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13065 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13068 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13072 && REG_P (XEXP (x
, 1))
13073 && REG_P (XEXP (x
, 0)))
13076 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13079 x
= legitimize_pic_address (x
, 0);
13082 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13085 if (REG_P (XEXP (x
, 0)))
13087 rtx temp
= gen_reg_rtx (Pmode
);
13088 rtx val
= force_operand (XEXP (x
, 1), temp
);
13091 if (GET_MODE (val
) != Pmode
)
13092 val
= convert_to_mode (Pmode
, val
, 1);
13093 emit_move_insn (temp
, val
);
13096 XEXP (x
, 1) = temp
;
13100 else if (REG_P (XEXP (x
, 1)))
13102 rtx temp
= gen_reg_rtx (Pmode
);
13103 rtx val
= force_operand (XEXP (x
, 0), temp
);
13106 if (GET_MODE (val
) != Pmode
)
13107 val
= convert_to_mode (Pmode
, val
, 1);
13108 emit_move_insn (temp
, val
);
13111 XEXP (x
, 0) = temp
;
13119 /* Print an integer constant expression in assembler syntax. Addition
13120 and subtraction are the only arithmetic that may appear in these
13121 expressions. FILE is the stdio stream to write to, X is the rtx, and
13122 CODE is the operand print code from the output string. */
13125 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13129 switch (GET_CODE (x
))
13132 gcc_assert (flag_pic
);
13137 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13138 output_addr_const (file
, x
);
13141 const char *name
= XSTR (x
, 0);
13143 /* Mark the decl as referenced so that cgraph will
13144 output the function. */
13145 if (SYMBOL_REF_DECL (x
))
13146 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13149 if (MACHOPIC_INDIRECT
13150 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13151 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13153 assemble_name (file
, name
);
13155 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13156 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13157 fputs ("@PLT", file
);
13164 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13165 assemble_name (asm_out_file
, buf
);
13169 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13173 /* This used to output parentheses around the expression,
13174 but that does not work on the 386 (either ATT or BSD assembler). */
13175 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13179 if (GET_MODE (x
) == VOIDmode
)
13181 /* We can use %d if the number is <32 bits and positive. */
13182 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13183 fprintf (file
, "0x%lx%08lx",
13184 (unsigned long) CONST_DOUBLE_HIGH (x
),
13185 (unsigned long) CONST_DOUBLE_LOW (x
));
13187 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13190 /* We can't handle floating point constants;
13191 TARGET_PRINT_OPERAND must handle them. */
13192 output_operand_lossage ("floating constant misused");
13196 /* Some assemblers need integer constants to appear first. */
13197 if (CONST_INT_P (XEXP (x
, 0)))
13199 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13201 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13205 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13206 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13208 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13214 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13215 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13217 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13219 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13223 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13225 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13230 gcc_assert (XVECLEN (x
, 0) == 1);
13231 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13232 switch (XINT (x
, 1))
13235 fputs ("@GOT", file
);
13237 case UNSPEC_GOTOFF
:
13238 fputs ("@GOTOFF", file
);
13240 case UNSPEC_PLTOFF
:
13241 fputs ("@PLTOFF", file
);
13244 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13245 "(%rip)" : "[rip]", file
);
13247 case UNSPEC_GOTPCREL
:
13248 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13249 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13251 case UNSPEC_GOTTPOFF
:
13252 /* FIXME: This might be @TPOFF in Sun ld too. */
13253 fputs ("@gottpoff", file
);
13256 fputs ("@tpoff", file
);
13258 case UNSPEC_NTPOFF
:
13260 fputs ("@tpoff", file
);
13262 fputs ("@ntpoff", file
);
13264 case UNSPEC_DTPOFF
:
13265 fputs ("@dtpoff", file
);
13267 case UNSPEC_GOTNTPOFF
:
13269 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13270 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13272 fputs ("@gotntpoff", file
);
13274 case UNSPEC_INDNTPOFF
:
13275 fputs ("@indntpoff", file
);
13278 case UNSPEC_MACHOPIC_OFFSET
:
13280 machopic_output_function_base_name (file
);
13284 output_operand_lossage ("invalid UNSPEC as operand");
13290 output_operand_lossage ("invalid expression as operand");
13294 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13295 We need to emit DTP-relative relocations. */
13297 static void ATTRIBUTE_UNUSED
13298 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13300 fputs (ASM_LONG
, file
);
13301 output_addr_const (file
, x
);
13302 fputs ("@dtpoff", file
);
13308 fputs (", 0", file
);
13311 gcc_unreachable ();
13315 /* Return true if X is a representation of the PIC register. This copes
13316 with calls from ix86_find_base_term, where the register might have
13317 been replaced by a cselib value. */
13320 ix86_pic_register_p (rtx x
)
13322 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13323 return (pic_offset_table_rtx
13324 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13326 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13329 /* Helper function for ix86_delegitimize_address.
13330 Attempt to delegitimize TLS local-exec accesses. */
13333 ix86_delegitimize_tls_address (rtx orig_x
)
13335 rtx x
= orig_x
, unspec
;
13336 struct ix86_address addr
;
13338 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13342 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13344 if (ix86_decompose_address (x
, &addr
) == 0
13345 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13346 || addr
.disp
== NULL_RTX
13347 || GET_CODE (addr
.disp
) != CONST
)
13349 unspec
= XEXP (addr
.disp
, 0);
13350 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13351 unspec
= XEXP (unspec
, 0);
13352 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13354 x
= XVECEXP (unspec
, 0, 0);
13355 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13356 if (unspec
!= XEXP (addr
.disp
, 0))
13357 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13360 rtx idx
= addr
.index
;
13361 if (addr
.scale
!= 1)
13362 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13363 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13366 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13367 if (MEM_P (orig_x
))
13368 x
= replace_equiv_address_nv (orig_x
, x
);
13372 /* In the name of slightly smaller debug output, and to cater to
13373 general assembler lossage, recognize PIC+GOTOFF and turn it back
13374 into a direct symbol reference.
13376 On Darwin, this is necessary to avoid a crash, because Darwin
13377 has a different PIC label for each routine but the DWARF debugging
13378 information is not associated with any particular routine, so it's
13379 necessary to remove references to the PIC label from RTL stored by
13380 the DWARF output code. */
13383 ix86_delegitimize_address (rtx x
)
13385 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13386 /* addend is NULL or some rtx if x is something+GOTOFF where
13387 something doesn't include the PIC register. */
13388 rtx addend
= NULL_RTX
;
13389 /* reg_addend is NULL or a multiple of some register. */
13390 rtx reg_addend
= NULL_RTX
;
13391 /* const_addend is NULL or a const_int. */
13392 rtx const_addend
= NULL_RTX
;
13393 /* This is the result, or NULL. */
13394 rtx result
= NULL_RTX
;
13403 if (GET_CODE (x
) == CONST
13404 && GET_CODE (XEXP (x
, 0)) == PLUS
13405 && GET_MODE (XEXP (x
, 0)) == Pmode
13406 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13407 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13408 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13410 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13411 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13412 if (MEM_P (orig_x
))
13413 x
= replace_equiv_address_nv (orig_x
, x
);
13416 if (GET_CODE (x
) != CONST
13417 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13418 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13419 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13420 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13421 return ix86_delegitimize_tls_address (orig_x
);
13422 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13423 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13425 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13433 if (GET_CODE (x
) != PLUS
13434 || GET_CODE (XEXP (x
, 1)) != CONST
)
13435 return ix86_delegitimize_tls_address (orig_x
);
13437 if (ix86_pic_register_p (XEXP (x
, 0)))
13438 /* %ebx + GOT/GOTOFF */
13440 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13442 /* %ebx + %reg * scale + GOT/GOTOFF */
13443 reg_addend
= XEXP (x
, 0);
13444 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13445 reg_addend
= XEXP (reg_addend
, 1);
13446 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13447 reg_addend
= XEXP (reg_addend
, 0);
13450 reg_addend
= NULL_RTX
;
13451 addend
= XEXP (x
, 0);
13455 addend
= XEXP (x
, 0);
13457 x
= XEXP (XEXP (x
, 1), 0);
13458 if (GET_CODE (x
) == PLUS
13459 && CONST_INT_P (XEXP (x
, 1)))
13461 const_addend
= XEXP (x
, 1);
13465 if (GET_CODE (x
) == UNSPEC
13466 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13467 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13468 result
= XVECEXP (x
, 0, 0);
13470 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13471 && !MEM_P (orig_x
))
13472 result
= XVECEXP (x
, 0, 0);
13475 return ix86_delegitimize_tls_address (orig_x
);
13478 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13480 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13483 /* If the rest of original X doesn't involve the PIC register, add
13484 addend and subtract pic_offset_table_rtx. This can happen e.g.
13486 leal (%ebx, %ecx, 4), %ecx
13488 movl foo@GOTOFF(%ecx), %edx
13489 in which case we return (%ecx - %ebx) + foo. */
13490 if (pic_offset_table_rtx
)
13491 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13492 pic_offset_table_rtx
),
13497 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13499 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13500 if (result
== NULL_RTX
)
13506 /* If X is a machine specific address (i.e. a symbol or label being
13507 referenced as a displacement from the GOT implemented using an
13508 UNSPEC), then return the base term. Otherwise return X. */
13511 ix86_find_base_term (rtx x
)
13517 if (GET_CODE (x
) != CONST
)
13519 term
= XEXP (x
, 0);
13520 if (GET_CODE (term
) == PLUS
13521 && (CONST_INT_P (XEXP (term
, 1))
13522 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13523 term
= XEXP (term
, 0);
13524 if (GET_CODE (term
) != UNSPEC
13525 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13526 && XINT (term
, 1) != UNSPEC_PCREL
))
13529 return XVECEXP (term
, 0, 0);
13532 return ix86_delegitimize_address (x
);
13536 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
13537 int fp
, FILE *file
)
13539 const char *suffix
;
13541 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13543 code
= ix86_fp_compare_code_to_integer (code
);
13547 code
= reverse_condition (code
);
13598 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13602 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13603 Those same assemblers have the same but opposite lossage on cmov. */
13604 if (mode
== CCmode
)
13605 suffix
= fp
? "nbe" : "a";
13606 else if (mode
== CCCmode
)
13609 gcc_unreachable ();
13625 gcc_unreachable ();
13629 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13646 gcc_unreachable ();
13650 /* ??? As above. */
13651 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13652 suffix
= fp
? "nb" : "ae";
13655 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13659 /* ??? As above. */
13660 if (mode
== CCmode
)
13662 else if (mode
== CCCmode
)
13663 suffix
= fp
? "nb" : "ae";
13665 gcc_unreachable ();
13668 suffix
= fp
? "u" : "p";
13671 suffix
= fp
? "nu" : "np";
13674 gcc_unreachable ();
13676 fputs (suffix
, file
);
13679 /* Print the name of register X to FILE based on its machine mode and number.
13680 If CODE is 'w', pretend the mode is HImode.
13681 If CODE is 'b', pretend the mode is QImode.
13682 If CODE is 'k', pretend the mode is SImode.
13683 If CODE is 'q', pretend the mode is DImode.
13684 If CODE is 'x', pretend the mode is V4SFmode.
13685 If CODE is 't', pretend the mode is V8SFmode.
13686 If CODE is 'h', pretend the reg is the 'high' byte register.
13687 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13688 If CODE is 'd', duplicate the operand for AVX instruction.
13692 print_reg (rtx x
, int code
, FILE *file
)
13695 bool duplicated
= code
== 'd' && TARGET_AVX
;
13697 gcc_assert (x
== pc_rtx
13698 || (REGNO (x
) != ARG_POINTER_REGNUM
13699 && REGNO (x
) != FRAME_POINTER_REGNUM
13700 && REGNO (x
) != FLAGS_REG
13701 && REGNO (x
) != FPSR_REG
13702 && REGNO (x
) != FPCR_REG
));
13704 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13709 gcc_assert (TARGET_64BIT
);
13710 fputs ("rip", file
);
13714 if (code
== 'w' || MMX_REG_P (x
))
13716 else if (code
== 'b')
13718 else if (code
== 'k')
13720 else if (code
== 'q')
13722 else if (code
== 'y')
13724 else if (code
== 'h')
13726 else if (code
== 'x')
13728 else if (code
== 't')
13731 code
= GET_MODE_SIZE (GET_MODE (x
));
13733 /* Irritatingly, AMD extended registers use different naming convention
13734 from the normal registers: "r%d[bwd]" */
13735 if (REX_INT_REG_P (x
))
13737 gcc_assert (TARGET_64BIT
);
13739 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13743 error ("extended registers have no high halves");
13758 error ("unsupported operand size for extended register");
13768 if (STACK_TOP_P (x
))
13777 if (! ANY_FP_REG_P (x
))
13778 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13783 reg
= hi_reg_name
[REGNO (x
)];
13786 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13788 reg
= qi_reg_name
[REGNO (x
)];
13791 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13793 reg
= qi_high_reg_name
[REGNO (x
)];
13798 gcc_assert (!duplicated
);
13800 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13805 gcc_unreachable ();
13811 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13812 fprintf (file
, ", %%%s", reg
);
13814 fprintf (file
, ", %s", reg
);
13818 /* Locate some local-dynamic symbol still in use by this function
13819 so that we can print its name in some tls_local_dynamic_base
13823 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13827 if (GET_CODE (x
) == SYMBOL_REF
13828 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13830 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13837 static const char *
13838 get_some_local_dynamic_name (void)
13842 if (cfun
->machine
->some_ld_name
)
13843 return cfun
->machine
->some_ld_name
;
13845 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13846 if (NONDEBUG_INSN_P (insn
)
13847 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13848 return cfun
->machine
->some_ld_name
;
13853 /* Meaning of CODE:
13854 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13855 C -- print opcode suffix for set/cmov insn.
13856 c -- like C, but print reversed condition
13857 F,f -- likewise, but for floating-point.
13858 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13860 R -- print the prefix for register names.
13861 z -- print the opcode suffix for the size of the current operand.
13862 Z -- likewise, with special suffixes for x87 instructions.
13863 * -- print a star (in certain assembler syntax)
13864 A -- print an absolute memory reference.
13865 E -- print address with DImode register names if TARGET_64BIT.
13866 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13867 s -- print a shift double count, followed by the assemblers argument
13869 b -- print the QImode name of the register for the indicated operand.
13870 %b0 would print %al if operands[0] is reg 0.
13871 w -- likewise, print the HImode name of the register.
13872 k -- likewise, print the SImode name of the register.
13873 q -- likewise, print the DImode name of the register.
13874 x -- likewise, print the V4SFmode name of the register.
13875 t -- likewise, print the V8SFmode name of the register.
13876 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13877 y -- print "st(0)" instead of "st" as a register.
13878 d -- print duplicated register operand for AVX instruction.
13879 D -- print condition for SSE cmp instruction.
13880 P -- if PIC, print an @PLT suffix.
13881 p -- print raw symbol name.
13882 X -- don't print any sort of PIC '@' suffix for a symbol.
13883 & -- print some in-use local-dynamic symbol name.
13884 H -- print a memory address offset by 8; used for sse high-parts
13885 Y -- print condition for XOP pcom* instruction.
13886 + -- print a branch hint as 'cs' or 'ds' prefix
13887 ; -- print a semicolon (after prefixes due to bug in older gas).
13888 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13889 @ -- print a segment register of thread base pointer load
13890 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13894 ix86_print_operand (FILE *file
, rtx x
, int code
)
13901 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13907 const char *name
= get_some_local_dynamic_name ();
13909 output_operand_lossage ("'%%&' used without any "
13910 "local dynamic TLS references");
13912 assemble_name (file
, name
);
13917 switch (ASSEMBLER_DIALECT
)
13924 /* Intel syntax. For absolute addresses, registers should not
13925 be surrounded by braces. */
13929 ix86_print_operand (file
, x
, 0);
13936 gcc_unreachable ();
13939 ix86_print_operand (file
, x
, 0);
13943 /* Wrap address in an UNSPEC to declare special handling. */
13945 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
13947 output_address (x
);
13951 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13956 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13961 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13966 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13971 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13976 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13981 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13983 /* Opcodes don't get size suffixes if using Intel opcodes. */
13984 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13987 switch (GET_MODE_SIZE (GET_MODE (x
)))
14006 output_operand_lossage
14007 ("invalid operand size for operand code '%c'", code
);
14012 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14014 (0, "non-integer operand used with operand code '%c'", code
);
14018 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14019 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14022 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14024 switch (GET_MODE_SIZE (GET_MODE (x
)))
14027 #ifdef HAVE_AS_IX86_FILDS
14037 #ifdef HAVE_AS_IX86_FILDQ
14040 fputs ("ll", file
);
14048 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14050 /* 387 opcodes don't get size suffixes
14051 if the operands are registers. */
14052 if (STACK_REG_P (x
))
14055 switch (GET_MODE_SIZE (GET_MODE (x
)))
14076 output_operand_lossage
14077 ("invalid operand type used with operand code '%c'", code
);
14081 output_operand_lossage
14082 ("invalid operand size for operand code '%c'", code
);
14100 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14102 ix86_print_operand (file
, x
, 0);
14103 fputs (", ", file
);
14108 /* Little bit of braindamage here. The SSE compare instructions
14109 does use completely different names for the comparisons that the
14110 fp conditional moves. */
14113 switch (GET_CODE (x
))
14116 fputs ("eq", file
);
14119 fputs ("eq_us", file
);
14122 fputs ("lt", file
);
14125 fputs ("nge", file
);
14128 fputs ("le", file
);
14131 fputs ("ngt", file
);
14134 fputs ("unord", file
);
14137 fputs ("neq", file
);
14140 fputs ("neq_oq", file
);
14143 fputs ("ge", file
);
14146 fputs ("nlt", file
);
14149 fputs ("gt", file
);
14152 fputs ("nle", file
);
14155 fputs ("ord", file
);
14158 output_operand_lossage ("operand is not a condition code, "
14159 "invalid operand code 'D'");
14165 switch (GET_CODE (x
))
14169 fputs ("eq", file
);
14173 fputs ("lt", file
);
14177 fputs ("le", file
);
14180 fputs ("unord", file
);
14184 fputs ("neq", file
);
14188 fputs ("nlt", file
);
14192 fputs ("nle", file
);
14195 fputs ("ord", file
);
14198 output_operand_lossage ("operand is not a condition code, "
14199 "invalid operand code 'D'");
14205 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14206 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14208 switch (GET_MODE (x
))
14210 case HImode
: putc ('w', file
); break;
14212 case SFmode
: putc ('l', file
); break;
14214 case DFmode
: putc ('q', file
); break;
14215 default: gcc_unreachable ();
14222 if (!COMPARISON_P (x
))
14224 output_operand_lossage ("operand is neither a constant nor a "
14225 "condition code, invalid operand code "
14229 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
14232 if (!COMPARISON_P (x
))
14234 output_operand_lossage ("operand is neither a constant nor a "
14235 "condition code, invalid operand code "
14239 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14240 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14243 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
14246 /* Like above, but reverse condition */
14248 /* Check to see if argument to %c is really a constant
14249 and not a condition code which needs to be reversed. */
14250 if (!COMPARISON_P (x
))
14252 output_operand_lossage ("operand is neither a constant nor a "
14253 "condition code, invalid operand "
14257 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
14260 if (!COMPARISON_P (x
))
14262 output_operand_lossage ("operand is neither a constant nor a "
14263 "condition code, invalid operand "
14267 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14268 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14271 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
14275 if (!offsettable_memref_p (x
))
14277 output_operand_lossage ("operand is not an offsettable memory "
14278 "reference, invalid operand "
14282 /* It doesn't actually matter what mode we use here, as we're
14283 only going to use this for printing. */
14284 x
= adjust_address_nv (x
, DImode
, 8);
14292 || optimize_function_for_size_p (cfun
)
14293 || !TARGET_BRANCH_PREDICTION_HINTS
)
14296 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14299 int pred_val
= INTVAL (XEXP (x
, 0));
14301 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14302 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14304 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14306 = final_forward_branch_p (current_output_insn
) == 0;
14308 /* Emit hints only in the case default branch prediction
14309 heuristics would fail. */
14310 if (taken
!= cputaken
)
14312 /* We use 3e (DS) prefix for taken branches and
14313 2e (CS) prefix for not taken branches. */
14315 fputs ("ds ; ", file
);
14317 fputs ("cs ; ", file
);
14325 switch (GET_CODE (x
))
14328 fputs ("neq", file
);
14331 fputs ("eq", file
);
14335 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14339 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14343 fputs ("le", file
);
14347 fputs ("lt", file
);
14350 fputs ("unord", file
);
14353 fputs ("ord", file
);
14356 fputs ("ueq", file
);
14359 fputs ("nlt", file
);
14362 fputs ("nle", file
);
14365 fputs ("ule", file
);
14368 fputs ("ult", file
);
14371 fputs ("une", file
);
14374 output_operand_lossage ("operand is not a condition code, "
14375 "invalid operand code 'Y'");
14381 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14387 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14390 /* The kernel uses a different segment register for performance
14391 reasons; a system call would not have to trash the userspace
14392 segment register, which would be expensive. */
14393 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14394 fputs ("fs", file
);
14396 fputs ("gs", file
);
14400 putc (TARGET_AVX2
? 'i' : 'f', file
);
14404 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14405 fputs ("addr32 ", file
);
14409 output_operand_lossage ("invalid operand code '%c'", code
);
14414 print_reg (x
, code
, file
);
14416 else if (MEM_P (x
))
14418 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14419 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14420 && GET_MODE (x
) != BLKmode
)
14423 switch (GET_MODE_SIZE (GET_MODE (x
)))
14425 case 1: size
= "BYTE"; break;
14426 case 2: size
= "WORD"; break;
14427 case 4: size
= "DWORD"; break;
14428 case 8: size
= "QWORD"; break;
14429 case 12: size
= "TBYTE"; break;
14431 if (GET_MODE (x
) == XFmode
)
14436 case 32: size
= "YMMWORD"; break;
14438 gcc_unreachable ();
14441 /* Check for explicit size override (codes 'b', 'w', 'k',
14445 else if (code
== 'w')
14447 else if (code
== 'k')
14449 else if (code
== 'q')
14451 else if (code
== 'x')
14454 fputs (size
, file
);
14455 fputs (" PTR ", file
);
14459 /* Avoid (%rip) for call operands. */
14460 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14461 && !CONST_INT_P (x
))
14462 output_addr_const (file
, x
);
14463 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14464 output_operand_lossage ("invalid constraints for operand");
14466 output_address (x
);
14469 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14474 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14475 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14477 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14479 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14481 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14483 fprintf (file
, "0x%08x", (unsigned int) l
);
14486 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14491 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14492 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14494 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14496 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14499 /* These float cases don't actually occur as immediate operands. */
14500 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14504 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14505 fputs (dstr
, file
);
14510 /* We have patterns that allow zero sets of memory, for instance.
14511 In 64-bit mode, we should probably support all 8-byte vectors,
14512 since we can in fact encode that into an immediate. */
14513 if (GET_CODE (x
) == CONST_VECTOR
)
14515 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14519 if (code
!= 'P' && code
!= 'p')
14521 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14523 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14526 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14527 || GET_CODE (x
) == LABEL_REF
)
14529 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14532 fputs ("OFFSET FLAT:", file
);
14535 if (CONST_INT_P (x
))
14536 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14537 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14538 output_pic_addr_const (file
, x
, code
);
14540 output_addr_const (file
, x
);
14545 ix86_print_operand_punct_valid_p (unsigned char code
)
14547 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14548 || code
== ';' || code
== '~' || code
== '^');
14551 /* Print a memory operand whose address is ADDR. */
14554 ix86_print_operand_address (FILE *file
, rtx addr
)
14556 struct ix86_address parts
;
14557 rtx base
, index
, disp
;
14563 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14565 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14566 gcc_assert (parts
.index
== NULL_RTX
);
14567 parts
.index
= XVECEXP (addr
, 0, 1);
14568 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14569 addr
= XVECEXP (addr
, 0, 0);
14572 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14574 gcc_assert (TARGET_64BIT
);
14575 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14579 ok
= ix86_decompose_address (addr
, &parts
);
14583 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14585 rtx tmp
= SUBREG_REG (parts
.base
);
14586 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14587 tmp
, GET_MODE (tmp
), 0);
14590 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14592 rtx tmp
= SUBREG_REG (parts
.index
);
14593 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14594 tmp
, GET_MODE (tmp
), 0);
14598 index
= parts
.index
;
14600 scale
= parts
.scale
;
14608 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14610 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14613 gcc_unreachable ();
14616 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14617 if (TARGET_64BIT
&& !base
&& !index
)
14621 if (GET_CODE (disp
) == CONST
14622 && GET_CODE (XEXP (disp
, 0)) == PLUS
14623 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14624 symbol
= XEXP (XEXP (disp
, 0), 0);
14626 if (GET_CODE (symbol
) == LABEL_REF
14627 || (GET_CODE (symbol
) == SYMBOL_REF
14628 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14631 if (!base
&& !index
)
14633 /* Displacement only requires special attention. */
14635 if (CONST_INT_P (disp
))
14637 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14638 fputs ("ds:", file
);
14639 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14642 output_pic_addr_const (file
, disp
, 0);
14644 output_addr_const (file
, disp
);
14648 /* Print SImode register names for zero-extended
14649 addresses to force addr32 prefix. */
14651 && (GET_CODE (addr
) == ZERO_EXTEND
14652 || GET_CODE (addr
) == AND
))
14654 gcc_assert (!code
);
14658 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14663 output_pic_addr_const (file
, disp
, 0);
14664 else if (GET_CODE (disp
) == LABEL_REF
)
14665 output_asm_label (disp
);
14667 output_addr_const (file
, disp
);
14672 print_reg (base
, code
, file
);
14676 print_reg (index
, vsib
? 0 : code
, file
);
14677 if (scale
!= 1 || vsib
)
14678 fprintf (file
, ",%d", scale
);
14684 rtx offset
= NULL_RTX
;
14688 /* Pull out the offset of a symbol; print any symbol itself. */
14689 if (GET_CODE (disp
) == CONST
14690 && GET_CODE (XEXP (disp
, 0)) == PLUS
14691 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14693 offset
= XEXP (XEXP (disp
, 0), 1);
14694 disp
= gen_rtx_CONST (VOIDmode
,
14695 XEXP (XEXP (disp
, 0), 0));
14699 output_pic_addr_const (file
, disp
, 0);
14700 else if (GET_CODE (disp
) == LABEL_REF
)
14701 output_asm_label (disp
);
14702 else if (CONST_INT_P (disp
))
14705 output_addr_const (file
, disp
);
14711 print_reg (base
, code
, file
);
14714 if (INTVAL (offset
) >= 0)
14716 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14720 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14727 print_reg (index
, vsib
? 0 : code
, file
);
14728 if (scale
!= 1 || vsib
)
14729 fprintf (file
, "*%d", scale
);
14736 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14739 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14743 if (GET_CODE (x
) != UNSPEC
)
14746 op
= XVECEXP (x
, 0, 0);
14747 switch (XINT (x
, 1))
14749 case UNSPEC_GOTTPOFF
:
14750 output_addr_const (file
, op
);
14751 /* FIXME: This might be @TPOFF in Sun ld. */
14752 fputs ("@gottpoff", file
);
14755 output_addr_const (file
, op
);
14756 fputs ("@tpoff", file
);
14758 case UNSPEC_NTPOFF
:
14759 output_addr_const (file
, op
);
14761 fputs ("@tpoff", file
);
14763 fputs ("@ntpoff", file
);
14765 case UNSPEC_DTPOFF
:
14766 output_addr_const (file
, op
);
14767 fputs ("@dtpoff", file
);
14769 case UNSPEC_GOTNTPOFF
:
14770 output_addr_const (file
, op
);
14772 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14773 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14775 fputs ("@gotntpoff", file
);
14777 case UNSPEC_INDNTPOFF
:
14778 output_addr_const (file
, op
);
14779 fputs ("@indntpoff", file
);
14782 case UNSPEC_MACHOPIC_OFFSET
:
14783 output_addr_const (file
, op
);
14785 machopic_output_function_base_name (file
);
14789 case UNSPEC_STACK_CHECK
:
14793 gcc_assert (flag_split_stack
);
14795 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14796 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14798 gcc_unreachable ();
14801 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14812 /* Split one or more double-mode RTL references into pairs of half-mode
14813 references. The RTL can be REG, offsettable MEM, integer constant, or
14814 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14815 split and "num" is its length. lo_half and hi_half are output arrays
14816 that parallel "operands". */
14819 split_double_mode (enum machine_mode mode
, rtx operands
[],
14820 int num
, rtx lo_half
[], rtx hi_half
[])
14822 enum machine_mode half_mode
;
14828 half_mode
= DImode
;
14831 half_mode
= SImode
;
14834 gcc_unreachable ();
14837 byte
= GET_MODE_SIZE (half_mode
);
14841 rtx op
= operands
[num
];
14843 /* simplify_subreg refuse to split volatile memory addresses,
14844 but we still have to handle it. */
14847 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14848 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14852 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14853 GET_MODE (op
) == VOIDmode
14854 ? mode
: GET_MODE (op
), 0);
14855 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14856 GET_MODE (op
) == VOIDmode
14857 ? mode
: GET_MODE (op
), byte
);
14862 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14863 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14864 is the expression of the binary operation. The output may either be
14865 emitted here, or returned to the caller, like all output_* functions.
14867 There is no guarantee that the operands are the same mode, as they
14868 might be within FLOAT or FLOAT_EXTEND expressions. */
14870 #ifndef SYSV386_COMPAT
14871 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14872 wants to fix the assemblers because that causes incompatibility
14873 with gcc. No-one wants to fix gcc because that causes
14874 incompatibility with assemblers... You can use the option of
14875 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14876 #define SYSV386_COMPAT 1
14880 output_387_binary_op (rtx insn
, rtx
*operands
)
14882 static char buf
[40];
14885 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14887 #ifdef ENABLE_CHECKING
14888 /* Even if we do not want to check the inputs, this documents input
14889 constraints. Which helps in understanding the following code. */
14890 if (STACK_REG_P (operands
[0])
14891 && ((REG_P (operands
[1])
14892 && REGNO (operands
[0]) == REGNO (operands
[1])
14893 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14894 || (REG_P (operands
[2])
14895 && REGNO (operands
[0]) == REGNO (operands
[2])
14896 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14897 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14900 gcc_assert (is_sse
);
14903 switch (GET_CODE (operands
[3]))
14906 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14907 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14915 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14916 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14924 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14925 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14933 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14934 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14942 gcc_unreachable ();
14949 strcpy (buf
, ssep
);
14950 if (GET_MODE (operands
[0]) == SFmode
)
14951 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
14953 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
14957 strcpy (buf
, ssep
+ 1);
14958 if (GET_MODE (operands
[0]) == SFmode
)
14959 strcat (buf
, "ss\t{%2, %0|%0, %2}");
14961 strcat (buf
, "sd\t{%2, %0|%0, %2}");
14967 switch (GET_CODE (operands
[3]))
14971 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14973 rtx temp
= operands
[2];
14974 operands
[2] = operands
[1];
14975 operands
[1] = temp
;
14978 /* know operands[0] == operands[1]. */
14980 if (MEM_P (operands
[2]))
14986 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14988 if (STACK_TOP_P (operands
[0]))
14989 /* How is it that we are storing to a dead operand[2]?
14990 Well, presumably operands[1] is dead too. We can't
14991 store the result to st(0) as st(0) gets popped on this
14992 instruction. Instead store to operands[2] (which I
14993 think has to be st(1)). st(1) will be popped later.
14994 gcc <= 2.8.1 didn't have this check and generated
14995 assembly code that the Unixware assembler rejected. */
14996 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14998 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15002 if (STACK_TOP_P (operands
[0]))
15003 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15005 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15010 if (MEM_P (operands
[1]))
15016 if (MEM_P (operands
[2]))
15022 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15025 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15026 derived assemblers, confusingly reverse the direction of
15027 the operation for fsub{r} and fdiv{r} when the
15028 destination register is not st(0). The Intel assembler
15029 doesn't have this brain damage. Read !SYSV386_COMPAT to
15030 figure out what the hardware really does. */
15031 if (STACK_TOP_P (operands
[0]))
15032 p
= "{p\t%0, %2|rp\t%2, %0}";
15034 p
= "{rp\t%2, %0|p\t%0, %2}";
15036 if (STACK_TOP_P (operands
[0]))
15037 /* As above for fmul/fadd, we can't store to st(0). */
15038 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15040 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15045 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15048 if (STACK_TOP_P (operands
[0]))
15049 p
= "{rp\t%0, %1|p\t%1, %0}";
15051 p
= "{p\t%1, %0|rp\t%0, %1}";
15053 if (STACK_TOP_P (operands
[0]))
15054 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15056 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15061 if (STACK_TOP_P (operands
[0]))
15063 if (STACK_TOP_P (operands
[1]))
15064 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15066 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15069 else if (STACK_TOP_P (operands
[1]))
15072 p
= "{\t%1, %0|r\t%0, %1}";
15074 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15080 p
= "{r\t%2, %0|\t%0, %2}";
15082 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15088 gcc_unreachable ();
15095 /* Return needed mode for entity in optimize_mode_switching pass. */
15098 ix86_mode_needed (int entity
, rtx insn
)
15100 enum attr_i387_cw mode
;
15102 /* The mode UNINITIALIZED is used to store control word after a
15103 function call or ASM pattern. The mode ANY specify that function
15104 has no requirements on the control word and make no changes in the
15105 bits we are interested in. */
15108 || (NONJUMP_INSN_P (insn
)
15109 && (asm_noperands (PATTERN (insn
)) >= 0
15110 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15111 return I387_CW_UNINITIALIZED
;
15113 if (recog_memoized (insn
) < 0)
15114 return I387_CW_ANY
;
15116 mode
= get_attr_i387_cw (insn
);
15121 if (mode
== I387_CW_TRUNC
)
15126 if (mode
== I387_CW_FLOOR
)
15131 if (mode
== I387_CW_CEIL
)
15136 if (mode
== I387_CW_MASK_PM
)
15141 gcc_unreachable ();
15144 return I387_CW_ANY
;
15147 /* Output code to initialize control word copies used by trunc?f?i and
15148 rounding patterns. CURRENT_MODE is set to current control word,
15149 while NEW_MODE is set to new control word. */
15152 emit_i387_cw_initialization (int mode
)
15154 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15157 enum ix86_stack_slot slot
;
15159 rtx reg
= gen_reg_rtx (HImode
);
15161 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15162 emit_move_insn (reg
, copy_rtx (stored_mode
));
15164 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15165 || optimize_function_for_size_p (cfun
))
15169 case I387_CW_TRUNC
:
15170 /* round toward zero (truncate) */
15171 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15172 slot
= SLOT_CW_TRUNC
;
15175 case I387_CW_FLOOR
:
15176 /* round down toward -oo */
15177 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15178 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15179 slot
= SLOT_CW_FLOOR
;
15183 /* round up toward +oo */
15184 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15185 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15186 slot
= SLOT_CW_CEIL
;
15189 case I387_CW_MASK_PM
:
15190 /* mask precision exception for nearbyint() */
15191 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15192 slot
= SLOT_CW_MASK_PM
;
15196 gcc_unreachable ();
15203 case I387_CW_TRUNC
:
15204 /* round toward zero (truncate) */
15205 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15206 slot
= SLOT_CW_TRUNC
;
15209 case I387_CW_FLOOR
:
15210 /* round down toward -oo */
15211 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15212 slot
= SLOT_CW_FLOOR
;
15216 /* round up toward +oo */
15217 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15218 slot
= SLOT_CW_CEIL
;
15221 case I387_CW_MASK_PM
:
15222 /* mask precision exception for nearbyint() */
15223 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15224 slot
= SLOT_CW_MASK_PM
;
15228 gcc_unreachable ();
15232 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15234 new_mode
= assign_386_stack_local (HImode
, slot
);
15235 emit_move_insn (new_mode
, reg
);
15238 /* Output code for INSN to convert a float to a signed int. OPERANDS
15239 are the insn operands. The output may be [HSD]Imode and the input
15240 operand may be [SDX]Fmode. */
15243 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15245 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15246 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15247 int round_mode
= get_attr_i387_cw (insn
);
15249 /* Jump through a hoop or two for DImode, since the hardware has no
15250 non-popping instruction. We used to do this a different way, but
15251 that was somewhat fragile and broke with post-reload splitters. */
15252 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15253 output_asm_insn ("fld\t%y1", operands
);
15255 gcc_assert (STACK_TOP_P (operands
[1]));
15256 gcc_assert (MEM_P (operands
[0]));
15257 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15260 output_asm_insn ("fisttp%Z0\t%0", operands
);
15263 if (round_mode
!= I387_CW_ANY
)
15264 output_asm_insn ("fldcw\t%3", operands
);
15265 if (stack_top_dies
|| dimode_p
)
15266 output_asm_insn ("fistp%Z0\t%0", operands
);
15268 output_asm_insn ("fist%Z0\t%0", operands
);
15269 if (round_mode
!= I387_CW_ANY
)
15270 output_asm_insn ("fldcw\t%2", operands
);
15276 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15277 have the values zero or one, indicates the ffreep insn's operand
15278 from the OPERANDS array. */
15280 static const char *
15281 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15283 if (TARGET_USE_FFREEP
)
15284 #ifdef HAVE_AS_IX86_FFREEP
15285 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15288 static char retval
[32];
15289 int regno
= REGNO (operands
[opno
]);
15291 gcc_assert (FP_REGNO_P (regno
));
15293 regno
-= FIRST_STACK_REG
;
15295 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15300 return opno
? "fstp\t%y1" : "fstp\t%y0";
15304 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15305 should be used. UNORDERED_P is true when fucom should be used. */
15308 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15310 int stack_top_dies
;
15311 rtx cmp_op0
, cmp_op1
;
15312 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15316 cmp_op0
= operands
[0];
15317 cmp_op1
= operands
[1];
15321 cmp_op0
= operands
[1];
15322 cmp_op1
= operands
[2];
15327 if (GET_MODE (operands
[0]) == SFmode
)
15329 return "%vucomiss\t{%1, %0|%0, %1}";
15331 return "%vcomiss\t{%1, %0|%0, %1}";
15334 return "%vucomisd\t{%1, %0|%0, %1}";
15336 return "%vcomisd\t{%1, %0|%0, %1}";
15339 gcc_assert (STACK_TOP_P (cmp_op0
));
15341 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15343 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15345 if (stack_top_dies
)
15347 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15348 return output_387_ffreep (operands
, 1);
15351 return "ftst\n\tfnstsw\t%0";
15354 if (STACK_REG_P (cmp_op1
)
15356 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15357 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15359 /* If both the top of the 387 stack dies, and the other operand
15360 is also a stack register that dies, then this must be a
15361 `fcompp' float compare */
15365 /* There is no double popping fcomi variant. Fortunately,
15366 eflags is immune from the fstp's cc clobbering. */
15368 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15370 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15371 return output_387_ffreep (operands
, 0);
15376 return "fucompp\n\tfnstsw\t%0";
15378 return "fcompp\n\tfnstsw\t%0";
15383 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15385 static const char * const alt
[16] =
15387 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15388 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15389 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15390 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15392 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15393 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15397 "fcomi\t{%y1, %0|%0, %y1}",
15398 "fcomip\t{%y1, %0|%0, %y1}",
15399 "fucomi\t{%y1, %0|%0, %y1}",
15400 "fucomip\t{%y1, %0|%0, %y1}",
15411 mask
= eflags_p
<< 3;
15412 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15413 mask
|= unordered_p
<< 1;
15414 mask
|= stack_top_dies
;
15416 gcc_assert (mask
< 16);
15425 ix86_output_addr_vec_elt (FILE *file
, int value
)
15427 const char *directive
= ASM_LONG
;
15431 directive
= ASM_QUAD
;
15433 gcc_assert (!TARGET_64BIT
);
15436 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15440 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15442 const char *directive
= ASM_LONG
;
15445 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15446 directive
= ASM_QUAD
;
15448 gcc_assert (!TARGET_64BIT
);
15450 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15451 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15452 fprintf (file
, "%s%s%d-%s%d\n",
15453 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15454 else if (HAVE_AS_GOTOFF_IN_DATA
)
15455 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15457 else if (TARGET_MACHO
)
15459 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15460 machopic_output_function_base_name (file
);
15465 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15466 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15469 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15473 ix86_expand_clear (rtx dest
)
15477 /* We play register width games, which are only valid after reload. */
15478 gcc_assert (reload_completed
);
15480 /* Avoid HImode and its attendant prefix byte. */
15481 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15482 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15483 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15485 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15486 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15488 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15489 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15495 /* X is an unchanging MEM. If it is a constant pool reference, return
15496 the constant pool rtx, else NULL. */
15499 maybe_get_pool_constant (rtx x
)
15501 x
= ix86_delegitimize_address (XEXP (x
, 0));
15503 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15504 return get_pool_constant (x
);
15510 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15513 enum tls_model model
;
15518 if (GET_CODE (op1
) == SYMBOL_REF
)
15520 model
= SYMBOL_REF_TLS_MODEL (op1
);
15523 op1
= legitimize_tls_address (op1
, model
, true);
15524 op1
= force_operand (op1
, op0
);
15527 if (GET_MODE (op1
) != mode
)
15528 op1
= convert_to_mode (mode
, op1
, 1);
15530 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15531 && SYMBOL_REF_DLLIMPORT_P (op1
))
15532 op1
= legitimize_dllimport_symbol (op1
, false);
15534 else if (GET_CODE (op1
) == CONST
15535 && GET_CODE (XEXP (op1
, 0)) == PLUS
15536 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15538 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15539 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15542 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15544 tmp
= legitimize_tls_address (symbol
, model
, true);
15545 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15546 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15547 tmp
= legitimize_dllimport_symbol (symbol
, true);
15551 tmp
= force_operand (tmp
, NULL
);
15552 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15553 op0
, 1, OPTAB_DIRECT
);
15556 if (GET_MODE (tmp
) != mode
)
15557 op1
= convert_to_mode (mode
, tmp
, 1);
15561 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15562 && symbolic_operand (op1
, mode
))
15564 if (TARGET_MACHO
&& !TARGET_64BIT
)
15567 /* dynamic-no-pic */
15568 if (MACHOPIC_INDIRECT
)
15570 rtx temp
= ((reload_in_progress
15571 || ((op0
&& REG_P (op0
))
15573 ? op0
: gen_reg_rtx (Pmode
));
15574 op1
= machopic_indirect_data_reference (op1
, temp
);
15576 op1
= machopic_legitimize_pic_address (op1
, mode
,
15577 temp
== op1
? 0 : temp
);
15579 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15581 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15585 if (GET_CODE (op0
) == MEM
)
15586 op1
= force_reg (Pmode
, op1
);
15590 if (GET_CODE (temp
) != REG
)
15591 temp
= gen_reg_rtx (Pmode
);
15592 temp
= legitimize_pic_address (op1
, temp
);
15597 /* dynamic-no-pic */
15603 op1
= force_reg (mode
, op1
);
15604 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15606 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15607 op1
= legitimize_pic_address (op1
, reg
);
15610 if (GET_MODE (op1
) != mode
)
15611 op1
= convert_to_mode (mode
, op1
, 1);
15618 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15619 || !push_operand (op0
, mode
))
15621 op1
= force_reg (mode
, op1
);
15623 if (push_operand (op0
, mode
)
15624 && ! general_no_elim_operand (op1
, mode
))
15625 op1
= copy_to_mode_reg (mode
, op1
);
15627 /* Force large constants in 64bit compilation into register
15628 to get them CSEed. */
15629 if (can_create_pseudo_p ()
15630 && (mode
== DImode
) && TARGET_64BIT
15631 && immediate_operand (op1
, mode
)
15632 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15633 && !register_operand (op0
, mode
)
15635 op1
= copy_to_mode_reg (mode
, op1
);
15637 if (can_create_pseudo_p ()
15638 && FLOAT_MODE_P (mode
)
15639 && GET_CODE (op1
) == CONST_DOUBLE
)
15641 /* If we are loading a floating point constant to a register,
15642 force the value to memory now, since we'll get better code
15643 out the back end. */
15645 op1
= validize_mem (force_const_mem (mode
, op1
));
15646 if (!register_operand (op0
, mode
))
15648 rtx temp
= gen_reg_rtx (mode
);
15649 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15650 emit_move_insn (op0
, temp
);
15656 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15660 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15662 rtx op0
= operands
[0], op1
= operands
[1];
15663 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15665 /* Force constants other than zero into memory. We do not know how
15666 the instructions used to build constants modify the upper 64 bits
15667 of the register, once we have that information we may be able
15668 to handle some of them more efficiently. */
15669 if (can_create_pseudo_p ()
15670 && register_operand (op0
, mode
)
15671 && (CONSTANT_P (op1
)
15672 || (GET_CODE (op1
) == SUBREG
15673 && CONSTANT_P (SUBREG_REG (op1
))))
15674 && !standard_sse_constant_p (op1
))
15675 op1
= validize_mem (force_const_mem (mode
, op1
));
15677 /* We need to check memory alignment for SSE mode since attribute
15678 can make operands unaligned. */
15679 if (can_create_pseudo_p ()
15680 && SSE_REG_MODE_P (mode
)
15681 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15682 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15686 /* ix86_expand_vector_move_misalign() does not like constants ... */
15687 if (CONSTANT_P (op1
)
15688 || (GET_CODE (op1
) == SUBREG
15689 && CONSTANT_P (SUBREG_REG (op1
))))
15690 op1
= validize_mem (force_const_mem (mode
, op1
));
15692 /* ... nor both arguments in memory. */
15693 if (!register_operand (op0
, mode
)
15694 && !register_operand (op1
, mode
))
15695 op1
= force_reg (mode
, op1
);
15697 tmp
[0] = op0
; tmp
[1] = op1
;
15698 ix86_expand_vector_move_misalign (mode
, tmp
);
15702 /* Make operand1 a register if it isn't already. */
15703 if (can_create_pseudo_p ()
15704 && !register_operand (op0
, mode
)
15705 && !register_operand (op1
, mode
))
15707 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15711 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15714 /* Split 32-byte AVX unaligned load and store if needed. */
15717 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15720 rtx (*extract
) (rtx
, rtx
, rtx
);
15721 rtx (*move_unaligned
) (rtx
, rtx
);
15722 enum machine_mode mode
;
15724 switch (GET_MODE (op0
))
15727 gcc_unreachable ();
15729 extract
= gen_avx_vextractf128v32qi
;
15730 move_unaligned
= gen_avx_movdqu256
;
15734 extract
= gen_avx_vextractf128v8sf
;
15735 move_unaligned
= gen_avx_movups256
;
15739 extract
= gen_avx_vextractf128v4df
;
15740 move_unaligned
= gen_avx_movupd256
;
15745 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15747 rtx r
= gen_reg_rtx (mode
);
15748 m
= adjust_address (op1
, mode
, 0);
15749 emit_move_insn (r
, m
);
15750 m
= adjust_address (op1
, mode
, 16);
15751 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15752 emit_move_insn (op0
, r
);
15754 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15756 m
= adjust_address (op0
, mode
, 0);
15757 emit_insn (extract (m
, op1
, const0_rtx
));
15758 m
= adjust_address (op0
, mode
, 16);
15759 emit_insn (extract (m
, op1
, const1_rtx
));
15762 emit_insn (move_unaligned (op0
, op1
));
15765 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15766 straight to ix86_expand_vector_move. */
15767 /* Code generation for scalar reg-reg moves of single and double precision data:
15768 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15772 if (x86_sse_partial_reg_dependency == true)
15777 Code generation for scalar loads of double precision data:
15778 if (x86_sse_split_regs == true)
15779 movlpd mem, reg (gas syntax)
15783 Code generation for unaligned packed loads of single precision data
15784 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15785 if (x86_sse_unaligned_move_optimal)
15788 if (x86_sse_partial_reg_dependency == true)
15800 Code generation for unaligned packed loads of double precision data
15801 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15802 if (x86_sse_unaligned_move_optimal)
15805 if (x86_sse_split_regs == true)
15818 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15827 switch (GET_MODE_CLASS (mode
))
15829 case MODE_VECTOR_INT
:
15831 switch (GET_MODE_SIZE (mode
))
15834 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15836 op0
= gen_lowpart (V4SFmode
, op0
);
15837 op1
= gen_lowpart (V4SFmode
, op1
);
15838 emit_insn (gen_sse_movups (op0
, op1
));
15842 op0
= gen_lowpart (V16QImode
, op0
);
15843 op1
= gen_lowpart (V16QImode
, op1
);
15844 emit_insn (gen_sse2_movdqu (op0
, op1
));
15848 op0
= gen_lowpart (V32QImode
, op0
);
15849 op1
= gen_lowpart (V32QImode
, op1
);
15850 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15853 gcc_unreachable ();
15856 case MODE_VECTOR_FLOAT
:
15860 emit_insn (gen_sse_movups (op0
, op1
));
15863 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15865 op0
= gen_lowpart (V4SFmode
, op0
);
15866 op1
= gen_lowpart (V4SFmode
, op1
);
15867 emit_insn (gen_sse_movups (op0
, op1
));
15870 emit_insn (gen_sse2_movupd (op0
, op1
));
15874 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15877 gcc_unreachable ();
15882 gcc_unreachable ();
15890 /* If we're optimizing for size, movups is the smallest. */
15891 if (optimize_insn_for_size_p ()
15892 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15894 op0
= gen_lowpart (V4SFmode
, op0
);
15895 op1
= gen_lowpart (V4SFmode
, op1
);
15896 emit_insn (gen_sse_movups (op0
, op1
));
15900 /* ??? If we have typed data, then it would appear that using
15901 movdqu is the only way to get unaligned data loaded with
15903 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15905 op0
= gen_lowpart (V16QImode
, op0
);
15906 op1
= gen_lowpart (V16QImode
, op1
);
15907 emit_insn (gen_sse2_movdqu (op0
, op1
));
15911 if (TARGET_SSE2
&& mode
== V2DFmode
)
15915 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15917 emit_insn (gen_sse2_movupd (op0
, op1
));
15921 /* When SSE registers are split into halves, we can avoid
15922 writing to the top half twice. */
15923 if (TARGET_SSE_SPLIT_REGS
)
15925 emit_clobber (op0
);
15930 /* ??? Not sure about the best option for the Intel chips.
15931 The following would seem to satisfy; the register is
15932 entirely cleared, breaking the dependency chain. We
15933 then store to the upper half, with a dependency depth
15934 of one. A rumor has it that Intel recommends two movsd
15935 followed by an unpacklpd, but this is unconfirmed. And
15936 given that the dependency depth of the unpacklpd would
15937 still be one, I'm not sure why this would be better. */
15938 zero
= CONST0_RTX (V2DFmode
);
15941 m
= adjust_address (op1
, DFmode
, 0);
15942 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15943 m
= adjust_address (op1
, DFmode
, 8);
15944 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15948 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15950 op0
= gen_lowpart (V4SFmode
, op0
);
15951 op1
= gen_lowpart (V4SFmode
, op1
);
15952 emit_insn (gen_sse_movups (op0
, op1
));
15956 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15957 emit_move_insn (op0
, CONST0_RTX (mode
));
15959 emit_clobber (op0
);
15961 if (mode
!= V4SFmode
)
15962 op0
= gen_lowpart (V4SFmode
, op0
);
15963 m
= adjust_address (op1
, V2SFmode
, 0);
15964 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
15965 m
= adjust_address (op1
, V2SFmode
, 8);
15966 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
15969 else if (MEM_P (op0
))
15971 /* If we're optimizing for size, movups is the smallest. */
15972 if (optimize_insn_for_size_p ()
15973 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15975 op0
= gen_lowpart (V4SFmode
, op0
);
15976 op1
= gen_lowpart (V4SFmode
, op1
);
15977 emit_insn (gen_sse_movups (op0
, op1
));
15981 /* ??? Similar to above, only less clear
15982 because of typeless stores. */
15983 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
15984 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15986 op0
= gen_lowpart (V16QImode
, op0
);
15987 op1
= gen_lowpart (V16QImode
, op1
);
15988 emit_insn (gen_sse2_movdqu (op0
, op1
));
15992 if (TARGET_SSE2
&& mode
== V2DFmode
)
15994 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15995 emit_insn (gen_sse2_movupd (op0
, op1
));
15998 m
= adjust_address (op0
, DFmode
, 0);
15999 emit_insn (gen_sse2_storelpd (m
, op1
));
16000 m
= adjust_address (op0
, DFmode
, 8);
16001 emit_insn (gen_sse2_storehpd (m
, op1
));
16006 if (mode
!= V4SFmode
)
16007 op1
= gen_lowpart (V4SFmode
, op1
);
16009 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
16011 op0
= gen_lowpart (V4SFmode
, op0
);
16012 emit_insn (gen_sse_movups (op0
, op1
));
16016 m
= adjust_address (op0
, V2SFmode
, 0);
16017 emit_insn (gen_sse_storelps (m
, op1
));
16018 m
= adjust_address (op0
, V2SFmode
, 8);
16019 emit_insn (gen_sse_storehps (m
, op1
));
16024 gcc_unreachable ();
16027 /* Expand a push in MODE. This is some mode for which we do not support
16028 proper push instructions, at least from the registers that we expect
16029 the value to live in. */
16032 ix86_expand_push (enum machine_mode mode
, rtx x
)
16036 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16037 GEN_INT (-GET_MODE_SIZE (mode
)),
16038 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16039 if (tmp
!= stack_pointer_rtx
)
16040 emit_move_insn (stack_pointer_rtx
, tmp
);
16042 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16044 /* When we push an operand onto stack, it has to be aligned at least
16045 at the function argument boundary. However since we don't have
16046 the argument type, we can't determine the actual argument
16048 emit_move_insn (tmp
, x
);
16051 /* Helper function of ix86_fixup_binary_operands to canonicalize
16052 operand order. Returns true if the operands should be swapped. */
16055 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16058 rtx dst
= operands
[0];
16059 rtx src1
= operands
[1];
16060 rtx src2
= operands
[2];
16062 /* If the operation is not commutative, we can't do anything. */
16063 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16066 /* Highest priority is that src1 should match dst. */
16067 if (rtx_equal_p (dst
, src1
))
16069 if (rtx_equal_p (dst
, src2
))
16072 /* Next highest priority is that immediate constants come second. */
16073 if (immediate_operand (src2
, mode
))
16075 if (immediate_operand (src1
, mode
))
16078 /* Lowest priority is that memory references should come second. */
16088 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16089 destination to use for the operation. If different from the true
16090 destination in operands[0], a copy operation will be required. */
16093 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16096 rtx dst
= operands
[0];
16097 rtx src1
= operands
[1];
16098 rtx src2
= operands
[2];
16100 /* Canonicalize operand order. */
16101 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16105 /* It is invalid to swap operands of different modes. */
16106 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16113 /* Both source operands cannot be in memory. */
16114 if (MEM_P (src1
) && MEM_P (src2
))
16116 /* Optimization: Only read from memory once. */
16117 if (rtx_equal_p (src1
, src2
))
16119 src2
= force_reg (mode
, src2
);
16123 src2
= force_reg (mode
, src2
);
16126 /* If the destination is memory, and we do not have matching source
16127 operands, do things in registers. */
16128 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16129 dst
= gen_reg_rtx (mode
);
16131 /* Source 1 cannot be a constant. */
16132 if (CONSTANT_P (src1
))
16133 src1
= force_reg (mode
, src1
);
16135 /* Source 1 cannot be a non-matching memory. */
16136 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16137 src1
= force_reg (mode
, src1
);
16139 /* Improve address combine. */
16141 && GET_MODE_CLASS (mode
) == MODE_INT
16143 src2
= force_reg (mode
, src2
);
16145 operands
[1] = src1
;
16146 operands
[2] = src2
;
16150 /* Similarly, but assume that the destination has already been
16151 set up properly. */
16154 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16155 enum machine_mode mode
, rtx operands
[])
16157 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16158 gcc_assert (dst
== operands
[0]);
16161 /* Attempt to expand a binary operator. Make the expansion closer to the
16162 actual machine, then just general_operand, which will allow 3 separate
16163 memory references (one output, two input) in a single insn. */
16166 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16169 rtx src1
, src2
, dst
, op
, clob
;
16171 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16172 src1
= operands
[1];
16173 src2
= operands
[2];
16175 /* Emit the instruction. */
16177 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16178 if (reload_in_progress
)
16180 /* Reload doesn't know about the flags register, and doesn't know that
16181 it doesn't want to clobber it. We can only do this with PLUS. */
16182 gcc_assert (code
== PLUS
);
16185 else if (reload_completed
16187 && !rtx_equal_p (dst
, src1
))
16189 /* This is going to be an LEA; avoid splitting it later. */
16194 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16195 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16198 /* Fix up the destination if needed. */
16199 if (dst
!= operands
[0])
16200 emit_move_insn (operands
[0], dst
);
16203 /* Return TRUE or FALSE depending on whether the binary operator meets the
16204 appropriate constraints. */
16207 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16210 rtx dst
= operands
[0];
16211 rtx src1
= operands
[1];
16212 rtx src2
= operands
[2];
16214 /* Both source operands cannot be in memory. */
16215 if (MEM_P (src1
) && MEM_P (src2
))
16218 /* Canonicalize operand order for commutative operators. */
16219 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16226 /* If the destination is memory, we must have a matching source operand. */
16227 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16230 /* Source 1 cannot be a constant. */
16231 if (CONSTANT_P (src1
))
16234 /* Source 1 cannot be a non-matching memory. */
16235 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16236 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16237 return (code
== AND
16240 || (TARGET_64BIT
&& mode
== DImode
))
16241 && satisfies_constraint_L (src2
));
16246 /* Attempt to expand a unary operator. Make the expansion closer to the
16247 actual machine, then just general_operand, which will allow 2 separate
16248 memory references (one output, one input) in a single insn. */
16251 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16254 int matching_memory
;
16255 rtx src
, dst
, op
, clob
;
16260 /* If the destination is memory, and we do not have matching source
16261 operands, do things in registers. */
16262 matching_memory
= 0;
16265 if (rtx_equal_p (dst
, src
))
16266 matching_memory
= 1;
16268 dst
= gen_reg_rtx (mode
);
16271 /* When source operand is memory, destination must match. */
16272 if (MEM_P (src
) && !matching_memory
)
16273 src
= force_reg (mode
, src
);
16275 /* Emit the instruction. */
16277 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16278 if (reload_in_progress
|| code
== NOT
)
16280 /* Reload doesn't know about the flags register, and doesn't know that
16281 it doesn't want to clobber it. */
16282 gcc_assert (code
== NOT
);
16287 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16288 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16291 /* Fix up the destination if needed. */
16292 if (dst
!= operands
[0])
16293 emit_move_insn (operands
[0], dst
);
16296 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16297 divisor are within the range [0-255]. */
16300 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16303 rtx end_label
, qimode_label
;
16304 rtx insn
, div
, mod
;
16305 rtx scratch
, tmp0
, tmp1
, tmp2
;
16306 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16307 rtx (*gen_zero_extend
) (rtx
, rtx
);
16308 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16313 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16314 gen_test_ccno_1
= gen_testsi_ccno_1
;
16315 gen_zero_extend
= gen_zero_extendqisi2
;
16318 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16319 gen_test_ccno_1
= gen_testdi_ccno_1
;
16320 gen_zero_extend
= gen_zero_extendqidi2
;
16323 gcc_unreachable ();
16326 end_label
= gen_label_rtx ();
16327 qimode_label
= gen_label_rtx ();
16329 scratch
= gen_reg_rtx (mode
);
16331 /* Use 8bit unsigned divimod if dividend and divisor are within
16332 the range [0-255]. */
16333 emit_move_insn (scratch
, operands
[2]);
16334 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16335 scratch
, 1, OPTAB_DIRECT
);
16336 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16337 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16338 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16339 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16340 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16342 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16343 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16344 JUMP_LABEL (insn
) = qimode_label
;
16346 /* Generate original signed/unsigned divimod. */
16347 div
= gen_divmod4_1 (operands
[0], operands
[1],
16348 operands
[2], operands
[3]);
16351 /* Branch to the end. */
16352 emit_jump_insn (gen_jump (end_label
));
16355 /* Generate 8bit unsigned divide. */
16356 emit_label (qimode_label
);
16357 /* Don't use operands[0] for result of 8bit divide since not all
16358 registers support QImode ZERO_EXTRACT. */
16359 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16360 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16361 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16362 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16366 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16367 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16371 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16372 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16375 /* Extract remainder from AH. */
16376 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16377 if (REG_P (operands
[1]))
16378 insn
= emit_move_insn (operands
[1], tmp1
);
16381 /* Need a new scratch register since the old one has result
16383 scratch
= gen_reg_rtx (mode
);
16384 emit_move_insn (scratch
, tmp1
);
16385 insn
= emit_move_insn (operands
[1], scratch
);
16387 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16389 /* Zero extend quotient from AL. */
16390 tmp1
= gen_lowpart (QImode
, tmp0
);
16391 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16392 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16394 emit_label (end_label
);
16397 #define LEA_MAX_STALL (3)
16398 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16400 /* Increase given DISTANCE in half-cycles according to
16401 dependencies between PREV and NEXT instructions.
16402 Add 1 half-cycle if there is no dependency and
16403 go to next cycle if there is some dependecy. */
16405 static unsigned int
16406 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16411 if (!prev
|| !next
)
16412 return distance
+ (distance
& 1) + 2;
16414 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16415 return distance
+ 1;
16417 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16418 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16419 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16420 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16421 return distance
+ (distance
& 1) + 2;
16423 return distance
+ 1;
16426 /* Function checks if instruction INSN defines register number
16427 REGNO1 or REGNO2. */
16430 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16435 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16436 if (DF_REF_REG_DEF_P (*def_rec
)
16437 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16438 && (regno1
== DF_REF_REGNO (*def_rec
)
16439 || regno2
== DF_REF_REGNO (*def_rec
)))
16447 /* Function checks if instruction INSN uses register number
16448 REGNO as a part of address expression. */
16451 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16455 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16456 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16462 /* Search backward for non-agu definition of register number REGNO1
16463 or register number REGNO2 in basic block starting from instruction
16464 START up to head of basic block or instruction INSN.
16466 Function puts true value into *FOUND var if definition was found
16467 and false otherwise.
16469 Distance in half-cycles between START and found instruction or head
16470 of BB is added to DISTANCE and returned. */
16473 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16474 rtx insn
, int distance
,
16475 rtx start
, bool *found
)
16477 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16485 && distance
< LEA_SEARCH_THRESHOLD
)
16487 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16489 distance
= increase_distance (prev
, next
, distance
);
16490 if (insn_defines_reg (regno1
, regno2
, prev
))
16492 if (recog_memoized (prev
) < 0
16493 || get_attr_type (prev
) != TYPE_LEA
)
16502 if (prev
== BB_HEAD (bb
))
16505 prev
= PREV_INSN (prev
);
16511 /* Search backward for non-agu definition of register number REGNO1
16512 or register number REGNO2 in INSN's basic block until
16513 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16514 2. Reach neighbour BBs boundary, or
16515 3. Reach agu definition.
16516 Returns the distance between the non-agu definition point and INSN.
16517 If no definition point, returns -1. */
16520 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16523 basic_block bb
= BLOCK_FOR_INSN (insn
);
16525 bool found
= false;
16527 if (insn
!= BB_HEAD (bb
))
16528 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16529 distance
, PREV_INSN (insn
),
16532 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16536 bool simple_loop
= false;
16538 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16541 simple_loop
= true;
16546 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16548 BB_END (bb
), &found
);
16551 int shortest_dist
= -1;
16552 bool found_in_bb
= false;
16554 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16557 = distance_non_agu_define_in_bb (regno1
, regno2
,
16563 if (shortest_dist
< 0)
16564 shortest_dist
= bb_dist
;
16565 else if (bb_dist
> 0)
16566 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16572 distance
= shortest_dist
;
16576 /* get_attr_type may modify recog data. We want to make sure
16577 that recog data is valid for instruction INSN, on which
16578 distance_non_agu_define is called. INSN is unchanged here. */
16579 extract_insn_cached (insn
);
16584 return distance
>> 1;
16587 /* Return the distance in half-cycles between INSN and the next
16588 insn that uses register number REGNO in memory address added
16589 to DISTANCE. Return -1 if REGNO0 is set.
16591 Put true value into *FOUND if register usage was found and
16593 Put true value into *REDEFINED if register redefinition was
16594 found and false otherwise. */
16597 distance_agu_use_in_bb (unsigned int regno
,
16598 rtx insn
, int distance
, rtx start
,
16599 bool *found
, bool *redefined
)
16601 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16606 *redefined
= false;
16610 && distance
< LEA_SEARCH_THRESHOLD
)
16612 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16614 distance
= increase_distance(prev
, next
, distance
);
16615 if (insn_uses_reg_mem (regno
, next
))
16617 /* Return DISTANCE if OP0 is used in memory
16618 address in NEXT. */
16623 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16625 /* Return -1 if OP0 is set in NEXT. */
16633 if (next
== BB_END (bb
))
16636 next
= NEXT_INSN (next
);
16642 /* Return the distance between INSN and the next insn that uses
16643 register number REGNO0 in memory address. Return -1 if no such
16644 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16647 distance_agu_use (unsigned int regno0
, rtx insn
)
16649 basic_block bb
= BLOCK_FOR_INSN (insn
);
16651 bool found
= false;
16652 bool redefined
= false;
16654 if (insn
!= BB_END (bb
))
16655 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16657 &found
, &redefined
);
16659 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16663 bool simple_loop
= false;
16665 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16668 simple_loop
= true;
16673 distance
= distance_agu_use_in_bb (regno0
, insn
,
16674 distance
, BB_HEAD (bb
),
16675 &found
, &redefined
);
16678 int shortest_dist
= -1;
16679 bool found_in_bb
= false;
16680 bool redefined_in_bb
= false;
16682 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16685 = distance_agu_use_in_bb (regno0
, insn
,
16686 distance
, BB_HEAD (e
->dest
),
16687 &found_in_bb
, &redefined_in_bb
);
16690 if (shortest_dist
< 0)
16691 shortest_dist
= bb_dist
;
16692 else if (bb_dist
> 0)
16693 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16699 distance
= shortest_dist
;
16703 if (!found
|| redefined
)
16706 return distance
>> 1;
16709 /* Define this macro to tune LEA priority vs ADD, it take effect when
16710 there is a dilemma of choicing LEA or ADD
16711 Negative value: ADD is more preferred than LEA
16713 Positive value: LEA is more preferred than ADD*/
16714 #define IX86_LEA_PRIORITY 0
16716 /* Return true if usage of lea INSN has performance advantage
16717 over a sequence of instructions. Instructions sequence has
16718 SPLIT_COST cycles higher latency than lea latency. */
16721 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16722 unsigned int regno2
, unsigned int split_cost
)
16724 int dist_define
, dist_use
;
16726 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16727 dist_use
= distance_agu_use (regno0
, insn
);
16729 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16731 /* If there is no non AGU operand definition, no AGU
16732 operand usage and split cost is 0 then both lea
16733 and non lea variants have same priority. Currently
16734 we prefer lea for 64 bit code and non lea on 32 bit
16736 if (dist_use
< 0 && split_cost
== 0)
16737 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16742 /* With longer definitions distance lea is more preferable.
16743 Here we change it to take into account splitting cost and
16745 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16747 /* If there is no use in memory addess then we just check
16748 that split cost does not exceed AGU stall. */
16750 return dist_define
>= LEA_MAX_STALL
;
16752 /* If this insn has both backward non-agu dependence and forward
16753 agu dependence, the one with short distance takes effect. */
16754 return dist_define
>= dist_use
;
16757 /* Return true if it is legal to clobber flags by INSN and
16758 false otherwise. */
16761 ix86_ok_to_clobber_flags (rtx insn
)
16763 basic_block bb
= BLOCK_FOR_INSN (insn
);
16769 if (NONDEBUG_INSN_P (insn
))
16771 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16772 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16775 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16779 if (insn
== BB_END (bb
))
16782 insn
= NEXT_INSN (insn
);
16785 live
= df_get_live_out(bb
);
16786 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16789 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16790 move and add to avoid AGU stalls. */
16793 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16795 unsigned int regno0
= true_regnum (operands
[0]);
16796 unsigned int regno1
= true_regnum (operands
[1]);
16797 unsigned int regno2
= true_regnum (operands
[2]);
16799 /* Check if we need to optimize. */
16800 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16803 /* Check it is correct to split here. */
16804 if (!ix86_ok_to_clobber_flags(insn
))
16807 /* We need to split only adds with non destructive
16808 destination operand. */
16809 if (regno0
== regno1
|| regno0
== regno2
)
16812 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16815 /* Return true if we should emit lea instruction instead of mov
16819 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
16821 unsigned int regno0
;
16822 unsigned int regno1
;
16824 /* Check if we need to optimize. */
16825 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16828 /* Use lea for reg to reg moves only. */
16829 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
16832 regno0
= true_regnum (operands
[0]);
16833 regno1
= true_regnum (operands
[1]);
16835 return ix86_lea_outperforms (insn
, regno0
, regno1
, -1, 0);
16838 /* Return true if we need to split lea into a sequence of
16839 instructions to avoid AGU stalls. */
16842 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
16844 unsigned int regno0
= true_regnum (operands
[0]) ;
16845 unsigned int regno1
= -1;
16846 unsigned int regno2
= -1;
16847 unsigned int split_cost
= 0;
16848 struct ix86_address parts
;
16851 /* Check we need to optimize. */
16852 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16855 /* Check it is correct to split here. */
16856 if (!ix86_ok_to_clobber_flags(insn
))
16859 ok
= ix86_decompose_address (operands
[1], &parts
);
16862 /* We should not split into add if non legitimate pic
16863 operand is used as displacement. */
16864 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16868 regno1
= true_regnum (parts
.base
);
16870 regno2
= true_regnum (parts
.index
);
16872 /* Compute how many cycles we will add to execution time
16873 if split lea into a sequence of instructions. */
16874 if (parts
.base
|| parts
.index
)
16876 /* Have to use mov instruction if non desctructive
16877 destination form is used. */
16878 if (regno1
!= regno0
&& regno2
!= regno0
)
16881 /* Have to add index to base if both exist. */
16882 if (parts
.base
&& parts
.index
)
16885 /* Have to use shift and adds if scale is 2 or greater. */
16886 if (parts
.scale
> 1)
16888 if (regno0
!= regno1
)
16890 else if (regno2
== regno0
)
16893 split_cost
+= parts
.scale
;
16896 /* Have to use add instruction with immediate if
16897 disp is non zero. */
16898 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16901 /* Subtract the price of lea. */
16905 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
16908 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16909 matches destination. RTX includes clobber of FLAGS_REG. */
16912 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
16917 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
16918 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16920 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16923 /* Split lea instructions into a sequence of instructions
16924 which are executed on ALU to avoid AGU stalls.
16925 It is assumed that it is allowed to clobber flags register
16926 at lea position. */
16929 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
16931 unsigned int regno0
= true_regnum (operands
[0]) ;
16932 unsigned int regno1
= INVALID_REGNUM
;
16933 unsigned int regno2
= INVALID_REGNUM
;
16934 struct ix86_address parts
;
16938 ok
= ix86_decompose_address (operands
[1], &parts
);
16943 if (GET_MODE (parts
.base
) != mode
)
16944 parts
.base
= gen_rtx_SUBREG (mode
, parts
.base
, 0);
16945 regno1
= true_regnum (parts
.base
);
16950 if (GET_MODE (parts
.index
) != mode
)
16951 parts
.index
= gen_rtx_SUBREG (mode
, parts
.index
, 0);
16952 regno2
= true_regnum (parts
.index
);
16955 if (parts
.scale
> 1)
16957 /* Case r1 = r1 + ... */
16958 if (regno1
== regno0
)
16960 /* If we have a case r1 = r1 + C * r1 then we
16961 should use multiplication which is very
16962 expensive. Assume cost model is wrong if we
16963 have such case here. */
16964 gcc_assert (regno2
!= regno0
);
16966 for (adds
= parts
.scale
; adds
> 0; adds
--)
16967 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.index
);
16971 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16972 if (regno0
!= regno2
)
16973 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16975 /* Use shift for scaling. */
16976 ix86_emit_binop (ASHIFT
, mode
, operands
[0],
16977 GEN_INT (exact_log2 (parts
.scale
)));
16980 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.base
);
16982 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16983 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16986 else if (!parts
.base
&& !parts
.index
)
16988 gcc_assert(parts
.disp
);
16989 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.disp
));
16995 if (regno0
!= regno2
)
16996 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16998 else if (!parts
.index
)
17000 if (regno0
!= regno1
)
17001 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
17005 if (regno0
== regno1
)
17007 else if (regno0
== regno2
)
17011 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
17015 ix86_emit_binop (PLUS
, mode
, operands
[0], tmp
);
17018 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17019 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
17023 /* Return true if it is ok to optimize an ADD operation to LEA
17024 operation to avoid flag register consumation. For most processors,
17025 ADD is faster than LEA. For the processors like ATOM, if the
17026 destination register of LEA holds an actual address which will be
17027 used soon, LEA is better and otherwise ADD is better. */
17030 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17032 unsigned int regno0
= true_regnum (operands
[0]);
17033 unsigned int regno1
= true_regnum (operands
[1]);
17034 unsigned int regno2
= true_regnum (operands
[2]);
17036 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17037 if (regno0
!= regno1
&& regno0
!= regno2
)
17040 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17043 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17046 /* Return true if destination reg of SET_BODY is shift count of
17050 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17056 /* Retrieve destination of SET_BODY. */
17057 switch (GET_CODE (set_body
))
17060 set_dest
= SET_DEST (set_body
);
17061 if (!set_dest
|| !REG_P (set_dest
))
17065 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17066 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17074 /* Retrieve shift count of USE_BODY. */
17075 switch (GET_CODE (use_body
))
17078 shift_rtx
= XEXP (use_body
, 1);
17081 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17082 if (ix86_dep_by_shift_count_body (set_body
,
17083 XVECEXP (use_body
, 0, i
)))
17091 && (GET_CODE (shift_rtx
) == ASHIFT
17092 || GET_CODE (shift_rtx
) == LSHIFTRT
17093 || GET_CODE (shift_rtx
) == ASHIFTRT
17094 || GET_CODE (shift_rtx
) == ROTATE
17095 || GET_CODE (shift_rtx
) == ROTATERT
))
17097 rtx shift_count
= XEXP (shift_rtx
, 1);
17099 /* Return true if shift count is dest of SET_BODY. */
17100 if (REG_P (shift_count
)
17101 && true_regnum (set_dest
) == true_regnum (shift_count
))
17108 /* Return true if destination reg of SET_INSN is shift count of
17112 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17114 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17115 PATTERN (use_insn
));
17118 /* Return TRUE or FALSE depending on whether the unary operator meets the
17119 appropriate constraints. */
17122 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17123 enum machine_mode mode ATTRIBUTE_UNUSED
,
17124 rtx operands
[2] ATTRIBUTE_UNUSED
)
17126 /* If one of operands is memory, source and destination must match. */
17127 if ((MEM_P (operands
[0])
17128 || MEM_P (operands
[1]))
17129 && ! rtx_equal_p (operands
[0], operands
[1]))
17134 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17135 are ok, keeping in mind the possible movddup alternative. */
17138 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17140 if (MEM_P (operands
[0]))
17141 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17142 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17143 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17147 /* Post-reload splitter for converting an SF or DFmode value in an
17148 SSE register into an unsigned SImode. */
17151 ix86_split_convert_uns_si_sse (rtx operands
[])
17153 enum machine_mode vecmode
;
17154 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17156 large
= operands
[1];
17157 zero_or_two31
= operands
[2];
17158 input
= operands
[3];
17159 two31
= operands
[4];
17160 vecmode
= GET_MODE (large
);
17161 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17163 /* Load up the value into the low element. We must ensure that the other
17164 elements are valid floats -- zero is the easiest such value. */
17167 if (vecmode
== V4SFmode
)
17168 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17170 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17174 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17175 emit_move_insn (value
, CONST0_RTX (vecmode
));
17176 if (vecmode
== V4SFmode
)
17177 emit_insn (gen_sse_movss (value
, value
, input
));
17179 emit_insn (gen_sse2_movsd (value
, value
, input
));
17182 emit_move_insn (large
, two31
);
17183 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17185 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17186 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17188 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17189 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17191 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17192 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17194 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17195 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17197 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17198 if (vecmode
== V4SFmode
)
17199 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17201 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17204 emit_insn (gen_xorv4si3 (value
, value
, large
));
17207 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17208 Expects the 64-bit DImode to be supplied in a pair of integral
17209 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17210 -mfpmath=sse, !optimize_size only. */
17213 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17215 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17216 rtx int_xmm
, fp_xmm
;
17217 rtx biases
, exponents
;
17220 int_xmm
= gen_reg_rtx (V4SImode
);
17221 if (TARGET_INTER_UNIT_MOVES
)
17222 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17223 else if (TARGET_SSE_SPLIT_REGS
)
17225 emit_clobber (int_xmm
);
17226 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17230 x
= gen_reg_rtx (V2DImode
);
17231 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17232 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17235 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17236 gen_rtvec (4, GEN_INT (0x43300000UL
),
17237 GEN_INT (0x45300000UL
),
17238 const0_rtx
, const0_rtx
));
17239 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17241 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17242 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17244 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17245 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17246 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17247 (0x1.0p84 + double(fp_value_hi_xmm)).
17248 Note these exponents differ by 32. */
17250 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17252 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17253 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17254 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17255 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17256 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17257 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17258 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17259 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17260 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17262 /* Add the upper and lower DFmode values together. */
17264 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17267 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17268 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17269 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17272 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17275 /* Not used, but eases macroization of patterns. */
17277 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17278 rtx input ATTRIBUTE_UNUSED
)
17280 gcc_unreachable ();
17283 /* Convert an unsigned SImode value into a DFmode. Only currently used
17284 for SSE, but applicable anywhere. */
17287 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17289 REAL_VALUE_TYPE TWO31r
;
17292 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17293 NULL
, 1, OPTAB_DIRECT
);
17295 fp
= gen_reg_rtx (DFmode
);
17296 emit_insn (gen_floatsidf2 (fp
, x
));
17298 real_ldexp (&TWO31r
, &dconst1
, 31);
17299 x
= const_double_from_real_value (TWO31r
, DFmode
);
17301 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17303 emit_move_insn (target
, x
);
17306 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17307 32-bit mode; otherwise we have a direct convert instruction. */
17310 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17312 REAL_VALUE_TYPE TWO32r
;
17313 rtx fp_lo
, fp_hi
, x
;
17315 fp_lo
= gen_reg_rtx (DFmode
);
17316 fp_hi
= gen_reg_rtx (DFmode
);
17318 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17320 real_ldexp (&TWO32r
, &dconst1
, 32);
17321 x
= const_double_from_real_value (TWO32r
, DFmode
);
17322 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17324 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17326 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17329 emit_move_insn (target
, x
);
17332 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17333 For x86_32, -mfpmath=sse, !optimize_size only. */
17335 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17337 REAL_VALUE_TYPE ONE16r
;
17338 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17340 real_ldexp (&ONE16r
, &dconst1
, 16);
17341 x
= const_double_from_real_value (ONE16r
, SFmode
);
17342 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17343 NULL
, 0, OPTAB_DIRECT
);
17344 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17345 NULL
, 0, OPTAB_DIRECT
);
17346 fp_hi
= gen_reg_rtx (SFmode
);
17347 fp_lo
= gen_reg_rtx (SFmode
);
17348 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17349 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17350 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17352 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17354 if (!rtx_equal_p (target
, fp_hi
))
17355 emit_move_insn (target
, fp_hi
);
17358 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17359 a vector of unsigned ints VAL to vector of floats TARGET. */
17362 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17365 REAL_VALUE_TYPE TWO16r
;
17366 enum machine_mode intmode
= GET_MODE (val
);
17367 enum machine_mode fltmode
= GET_MODE (target
);
17368 rtx (*cvt
) (rtx
, rtx
);
17370 if (intmode
== V4SImode
)
17371 cvt
= gen_floatv4siv4sf2
;
17373 cvt
= gen_floatv8siv8sf2
;
17374 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17375 tmp
[0] = force_reg (intmode
, tmp
[0]);
17376 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17378 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17379 NULL_RTX
, 1, OPTAB_DIRECT
);
17380 tmp
[3] = gen_reg_rtx (fltmode
);
17381 emit_insn (cvt (tmp
[3], tmp
[1]));
17382 tmp
[4] = gen_reg_rtx (fltmode
);
17383 emit_insn (cvt (tmp
[4], tmp
[2]));
17384 real_ldexp (&TWO16r
, &dconst1
, 16);
17385 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17386 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17387 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17389 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17391 if (tmp
[7] != target
)
17392 emit_move_insn (target
, tmp
[7]);
17395 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17396 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17397 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17398 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17401 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17403 REAL_VALUE_TYPE TWO31r
;
17404 rtx two31r
, tmp
[4];
17405 enum machine_mode mode
= GET_MODE (val
);
17406 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17407 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17408 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17411 for (i
= 0; i
< 3; i
++)
17412 tmp
[i
] = gen_reg_rtx (mode
);
17413 real_ldexp (&TWO31r
, &dconst1
, 31);
17414 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17415 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17416 two31r
= force_reg (mode
, two31r
);
17419 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17420 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17421 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17422 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17423 default: gcc_unreachable ();
17425 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17426 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17427 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17429 if (intmode
== V4SImode
|| TARGET_AVX2
)
17430 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17431 gen_lowpart (intmode
, tmp
[0]),
17432 GEN_INT (31), NULL_RTX
, 0,
17436 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17437 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17438 *xorp
= expand_simple_binop (intmode
, AND
,
17439 gen_lowpart (intmode
, tmp
[0]),
17440 two31
, NULL_RTX
, 0,
17443 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17447 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17448 then replicate the value for all elements of the vector
17452 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17456 enum machine_mode scalar_mode
;
17473 n_elt
= GET_MODE_NUNITS (mode
);
17474 v
= rtvec_alloc (n_elt
);
17475 scalar_mode
= GET_MODE_INNER (mode
);
17477 RTVEC_ELT (v
, 0) = value
;
17479 for (i
= 1; i
< n_elt
; ++i
)
17480 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17482 return gen_rtx_CONST_VECTOR (mode
, v
);
17485 gcc_unreachable ();
17489 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17490 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17491 for an SSE register. If VECT is true, then replicate the mask for
17492 all elements of the vector register. If INVERT is true, then create
17493 a mask excluding the sign bit. */
17496 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17498 enum machine_mode vec_mode
, imode
;
17499 HOST_WIDE_INT hi
, lo
;
17504 /* Find the sign bit, sign extended to 2*HWI. */
17512 mode
= GET_MODE_INNER (mode
);
17514 lo
= 0x80000000, hi
= lo
< 0;
17522 mode
= GET_MODE_INNER (mode
);
17524 if (HOST_BITS_PER_WIDE_INT
>= 64)
17525 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17527 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17532 vec_mode
= VOIDmode
;
17533 if (HOST_BITS_PER_WIDE_INT
>= 64)
17536 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17543 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17547 lo
= ~lo
, hi
= ~hi
;
17553 mask
= immed_double_const (lo
, hi
, imode
);
17555 vec
= gen_rtvec (2, v
, mask
);
17556 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17557 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17564 gcc_unreachable ();
17568 lo
= ~lo
, hi
= ~hi
;
17570 /* Force this value into the low part of a fp vector constant. */
17571 mask
= immed_double_const (lo
, hi
, imode
);
17572 mask
= gen_lowpart (mode
, mask
);
17574 if (vec_mode
== VOIDmode
)
17575 return force_reg (mode
, mask
);
17577 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17578 return force_reg (vec_mode
, v
);
17581 /* Generate code for floating point ABS or NEG. */
17584 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17587 rtx mask
, set
, dst
, src
;
17588 bool use_sse
= false;
17589 bool vector_mode
= VECTOR_MODE_P (mode
);
17590 enum machine_mode vmode
= mode
;
17594 else if (mode
== TFmode
)
17596 else if (TARGET_SSE_MATH
)
17598 use_sse
= SSE_FLOAT_MODE_P (mode
);
17599 if (mode
== SFmode
)
17601 else if (mode
== DFmode
)
17605 /* NEG and ABS performed with SSE use bitwise mask operations.
17606 Create the appropriate mask now. */
17608 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17615 set
= gen_rtx_fmt_e (code
, mode
, src
);
17616 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17623 use
= gen_rtx_USE (VOIDmode
, mask
);
17625 par
= gen_rtvec (2, set
, use
);
17628 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17629 par
= gen_rtvec (3, set
, use
, clob
);
17631 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17637 /* Expand a copysign operation. Special case operand 0 being a constant. */
17640 ix86_expand_copysign (rtx operands
[])
17642 enum machine_mode mode
, vmode
;
17643 rtx dest
, op0
, op1
, mask
, nmask
;
17645 dest
= operands
[0];
17649 mode
= GET_MODE (dest
);
17651 if (mode
== SFmode
)
17653 else if (mode
== DFmode
)
17658 if (GET_CODE (op0
) == CONST_DOUBLE
)
17660 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17662 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17663 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17665 if (mode
== SFmode
|| mode
== DFmode
)
17667 if (op0
== CONST0_RTX (mode
))
17668 op0
= CONST0_RTX (vmode
);
17671 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17673 op0
= force_reg (vmode
, v
);
17676 else if (op0
!= CONST0_RTX (mode
))
17677 op0
= force_reg (mode
, op0
);
17679 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17681 if (mode
== SFmode
)
17682 copysign_insn
= gen_copysignsf3_const
;
17683 else if (mode
== DFmode
)
17684 copysign_insn
= gen_copysigndf3_const
;
17686 copysign_insn
= gen_copysigntf3_const
;
17688 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17692 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17694 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17695 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17697 if (mode
== SFmode
)
17698 copysign_insn
= gen_copysignsf3_var
;
17699 else if (mode
== DFmode
)
17700 copysign_insn
= gen_copysigndf3_var
;
17702 copysign_insn
= gen_copysigntf3_var
;
17704 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17708 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17709 be a constant, and so has already been expanded into a vector constant. */
17712 ix86_split_copysign_const (rtx operands
[])
17714 enum machine_mode mode
, vmode
;
17715 rtx dest
, op0
, mask
, x
;
17717 dest
= operands
[0];
17719 mask
= operands
[3];
17721 mode
= GET_MODE (dest
);
17722 vmode
= GET_MODE (mask
);
17724 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17725 x
= gen_rtx_AND (vmode
, dest
, mask
);
17726 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17728 if (op0
!= CONST0_RTX (vmode
))
17730 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17731 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17735 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17736 so we have to do two masks. */
17739 ix86_split_copysign_var (rtx operands
[])
17741 enum machine_mode mode
, vmode
;
17742 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17744 dest
= operands
[0];
17745 scratch
= operands
[1];
17748 nmask
= operands
[4];
17749 mask
= operands
[5];
17751 mode
= GET_MODE (dest
);
17752 vmode
= GET_MODE (mask
);
17754 if (rtx_equal_p (op0
, op1
))
17756 /* Shouldn't happen often (it's useless, obviously), but when it does
17757 we'd generate incorrect code if we continue below. */
17758 emit_move_insn (dest
, op0
);
17762 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17764 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17766 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17767 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17770 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17771 x
= gen_rtx_NOT (vmode
, dest
);
17772 x
= gen_rtx_AND (vmode
, x
, op0
);
17773 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17777 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17779 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17781 else /* alternative 2,4 */
17783 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17784 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17785 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17787 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17789 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17791 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17792 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17794 else /* alternative 3,4 */
17796 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17798 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17799 x
= gen_rtx_AND (vmode
, dest
, op0
);
17801 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17804 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
17805 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17808 /* Return TRUE or FALSE depending on whether the first SET in INSN
17809 has source and destination with matching CC modes, and that the
17810 CC mode is at least as constrained as REQ_MODE. */
17813 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
17816 enum machine_mode set_mode
;
17818 set
= PATTERN (insn
);
17819 if (GET_CODE (set
) == PARALLEL
)
17820 set
= XVECEXP (set
, 0, 0);
17821 gcc_assert (GET_CODE (set
) == SET
);
17822 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
17824 set_mode
= GET_MODE (SET_DEST (set
));
17828 if (req_mode
!= CCNOmode
17829 && (req_mode
!= CCmode
17830 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
17834 if (req_mode
== CCGCmode
)
17838 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
17842 if (req_mode
== CCZmode
)
17852 if (set_mode
!= req_mode
)
17857 gcc_unreachable ();
17860 return GET_MODE (SET_SRC (set
)) == set_mode
;
17863 /* Generate insn patterns to do an integer compare of OPERANDS. */
17866 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17868 enum machine_mode cmpmode
;
17871 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17872 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17874 /* This is very simple, but making the interface the same as in the
17875 FP case makes the rest of the code easier. */
17876 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17877 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17879 /* Return the test that should be put into the flags user, i.e.
17880 the bcc, scc, or cmov instruction. */
17881 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17884 /* Figure out whether to use ordered or unordered fp comparisons.
17885 Return the appropriate mode to use. */
17888 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17890 /* ??? In order to make all comparisons reversible, we do all comparisons
17891 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17892 all forms trapping and nontrapping comparisons, we can make inequality
17893 comparisons trapping again, since it results in better code when using
17894 FCOM based compares. */
17895 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17899 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17901 enum machine_mode mode
= GET_MODE (op0
);
17903 if (SCALAR_FLOAT_MODE_P (mode
))
17905 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17906 return ix86_fp_compare_mode (code
);
17911 /* Only zero flag is needed. */
17912 case EQ
: /* ZF=0 */
17913 case NE
: /* ZF!=0 */
17915 /* Codes needing carry flag. */
17916 case GEU
: /* CF=0 */
17917 case LTU
: /* CF=1 */
17918 /* Detect overflow checks. They need just the carry flag. */
17919 if (GET_CODE (op0
) == PLUS
17920 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17924 case GTU
: /* CF=0 & ZF=0 */
17925 case LEU
: /* CF=1 | ZF=1 */
17926 /* Detect overflow checks. They need just the carry flag. */
17927 if (GET_CODE (op0
) == MINUS
17928 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17932 /* Codes possibly doable only with sign flag when
17933 comparing against zero. */
17934 case GE
: /* SF=OF or SF=0 */
17935 case LT
: /* SF<>OF or SF=1 */
17936 if (op1
== const0_rtx
)
17939 /* For other cases Carry flag is not required. */
17941 /* Codes doable only with sign flag when comparing
17942 against zero, but we miss jump instruction for it
17943 so we need to use relational tests against overflow
17944 that thus needs to be zero. */
17945 case GT
: /* ZF=0 & SF=OF */
17946 case LE
: /* ZF=1 | SF<>OF */
17947 if (op1
== const0_rtx
)
17951 /* strcmp pattern do (use flags) and combine may ask us for proper
17956 gcc_unreachable ();
17960 /* Return the fixed registers used for condition codes. */
17963 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
17970 /* If two condition code modes are compatible, return a condition code
17971 mode which is compatible with both. Otherwise, return
17974 static enum machine_mode
17975 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
17980 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
17983 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
17984 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
17987 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
17989 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
17995 gcc_unreachable ();
18025 /* These are only compatible with themselves, which we already
18032 /* Return a comparison we can do and that it is equivalent to
18033 swap_condition (code) apart possibly from orderedness.
18034 But, never change orderedness if TARGET_IEEE_FP, returning
18035 UNKNOWN in that case if necessary. */
18037 static enum rtx_code
18038 ix86_fp_swap_condition (enum rtx_code code
)
18042 case GT
: /* GTU - CF=0 & ZF=0 */
18043 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18044 case GE
: /* GEU - CF=0 */
18045 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18046 case UNLT
: /* LTU - CF=1 */
18047 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18048 case UNLE
: /* LEU - CF=1 | ZF=1 */
18049 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18051 return swap_condition (code
);
18055 /* Return cost of comparison CODE using the best strategy for performance.
18056 All following functions do use number of instructions as a cost metrics.
18057 In future this should be tweaked to compute bytes for optimize_size and
18058 take into account performance of various instructions on various CPUs. */
18061 ix86_fp_comparison_cost (enum rtx_code code
)
18065 /* The cost of code using bit-twiddling on %ah. */
18082 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18086 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18089 gcc_unreachable ();
18092 switch (ix86_fp_comparison_strategy (code
))
18094 case IX86_FPCMP_COMI
:
18095 return arith_cost
> 4 ? 3 : 2;
18096 case IX86_FPCMP_SAHF
:
18097 return arith_cost
> 4 ? 4 : 3;
18103 /* Return strategy to use for floating-point. We assume that fcomi is always
18104 preferrable where available, since that is also true when looking at size
18105 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18107 enum ix86_fpcmp_strategy
18108 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18110 /* Do fcomi/sahf based test when profitable. */
18113 return IX86_FPCMP_COMI
;
18115 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18116 return IX86_FPCMP_SAHF
;
18118 return IX86_FPCMP_ARITH
;
18121 /* Swap, force into registers, or otherwise massage the two operands
18122 to a fp comparison. The operands are updated in place; the new
18123 comparison code is returned. */
18125 static enum rtx_code
18126 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18128 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18129 rtx op0
= *pop0
, op1
= *pop1
;
18130 enum machine_mode op_mode
= GET_MODE (op0
);
18131 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18133 /* All of the unordered compare instructions only work on registers.
18134 The same is true of the fcomi compare instructions. The XFmode
18135 compare instructions require registers except when comparing
18136 against zero or when converting operand 1 from fixed point to
18140 && (fpcmp_mode
== CCFPUmode
18141 || (op_mode
== XFmode
18142 && ! (standard_80387_constant_p (op0
) == 1
18143 || standard_80387_constant_p (op1
) == 1)
18144 && GET_CODE (op1
) != FLOAT
)
18145 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18147 op0
= force_reg (op_mode
, op0
);
18148 op1
= force_reg (op_mode
, op1
);
18152 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18153 things around if they appear profitable, otherwise force op0
18154 into a register. */
18156 if (standard_80387_constant_p (op0
) == 0
18158 && ! (standard_80387_constant_p (op1
) == 0
18161 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18162 if (new_code
!= UNKNOWN
)
18165 tmp
= op0
, op0
= op1
, op1
= tmp
;
18171 op0
= force_reg (op_mode
, op0
);
18173 if (CONSTANT_P (op1
))
18175 int tmp
= standard_80387_constant_p (op1
);
18177 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18181 op1
= force_reg (op_mode
, op1
);
18184 op1
= force_reg (op_mode
, op1
);
18188 /* Try to rearrange the comparison to make it cheaper. */
18189 if (ix86_fp_comparison_cost (code
)
18190 > ix86_fp_comparison_cost (swap_condition (code
))
18191 && (REG_P (op1
) || can_create_pseudo_p ()))
18194 tmp
= op0
, op0
= op1
, op1
= tmp
;
18195 code
= swap_condition (code
);
18197 op0
= force_reg (op_mode
, op0
);
18205 /* Convert comparison codes we use to represent FP comparison to integer
18206 code that will result in proper branch. Return UNKNOWN if no such code
18210 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18239 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18242 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18244 enum machine_mode fpcmp_mode
, intcmp_mode
;
18247 fpcmp_mode
= ix86_fp_compare_mode (code
);
18248 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18250 /* Do fcomi/sahf based test when profitable. */
18251 switch (ix86_fp_comparison_strategy (code
))
18253 case IX86_FPCMP_COMI
:
18254 intcmp_mode
= fpcmp_mode
;
18255 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18256 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18261 case IX86_FPCMP_SAHF
:
18262 intcmp_mode
= fpcmp_mode
;
18263 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18264 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18268 scratch
= gen_reg_rtx (HImode
);
18269 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18270 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18273 case IX86_FPCMP_ARITH
:
18274 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18275 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18276 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18278 scratch
= gen_reg_rtx (HImode
);
18279 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18281 /* In the unordered case, we have to check C2 for NaN's, which
18282 doesn't happen to work out to anything nice combination-wise.
18283 So do some bit twiddling on the value we've got in AH to come
18284 up with an appropriate set of condition codes. */
18286 intcmp_mode
= CCNOmode
;
18291 if (code
== GT
|| !TARGET_IEEE_FP
)
18293 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18298 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18299 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18300 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18301 intcmp_mode
= CCmode
;
18307 if (code
== LT
&& TARGET_IEEE_FP
)
18309 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18310 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18311 intcmp_mode
= CCmode
;
18316 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18322 if (code
== GE
|| !TARGET_IEEE_FP
)
18324 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18329 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18330 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18336 if (code
== LE
&& TARGET_IEEE_FP
)
18338 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18339 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18340 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18341 intcmp_mode
= CCmode
;
18346 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18352 if (code
== EQ
&& TARGET_IEEE_FP
)
18354 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18355 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18356 intcmp_mode
= CCmode
;
18361 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18367 if (code
== NE
&& TARGET_IEEE_FP
)
18369 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18370 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18376 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18382 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18386 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18391 gcc_unreachable ();
18399 /* Return the test that should be put into the flags user, i.e.
18400 the bcc, scc, or cmov instruction. */
18401 return gen_rtx_fmt_ee (code
, VOIDmode
,
18402 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18407 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18411 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18412 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18414 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18416 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18417 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18420 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18426 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18428 enum machine_mode mode
= GET_MODE (op0
);
18440 tmp
= ix86_expand_compare (code
, op0
, op1
);
18441 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18442 gen_rtx_LABEL_REF (VOIDmode
, label
),
18444 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18451 /* Expand DImode branch into multiple compare+branch. */
18453 rtx lo
[2], hi
[2], label2
;
18454 enum rtx_code code1
, code2
, code3
;
18455 enum machine_mode submode
;
18457 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18459 tmp
= op0
, op0
= op1
, op1
= tmp
;
18460 code
= swap_condition (code
);
18463 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18464 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18466 submode
= mode
== DImode
? SImode
: DImode
;
18468 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18469 avoid two branches. This costs one extra insn, so disable when
18470 optimizing for size. */
18472 if ((code
== EQ
|| code
== NE
)
18473 && (!optimize_insn_for_size_p ()
18474 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18479 if (hi
[1] != const0_rtx
)
18480 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18481 NULL_RTX
, 0, OPTAB_WIDEN
);
18484 if (lo
[1] != const0_rtx
)
18485 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18486 NULL_RTX
, 0, OPTAB_WIDEN
);
18488 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18489 NULL_RTX
, 0, OPTAB_WIDEN
);
18491 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18495 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18496 op1 is a constant and the low word is zero, then we can just
18497 examine the high word. Similarly for low word -1 and
18498 less-or-equal-than or greater-than. */
18500 if (CONST_INT_P (hi
[1]))
18503 case LT
: case LTU
: case GE
: case GEU
:
18504 if (lo
[1] == const0_rtx
)
18506 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18510 case LE
: case LEU
: case GT
: case GTU
:
18511 if (lo
[1] == constm1_rtx
)
18513 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18521 /* Otherwise, we need two or three jumps. */
18523 label2
= gen_label_rtx ();
18526 code2
= swap_condition (code
);
18527 code3
= unsigned_condition (code
);
18531 case LT
: case GT
: case LTU
: case GTU
:
18534 case LE
: code1
= LT
; code2
= GT
; break;
18535 case GE
: code1
= GT
; code2
= LT
; break;
18536 case LEU
: code1
= LTU
; code2
= GTU
; break;
18537 case GEU
: code1
= GTU
; code2
= LTU
; break;
18539 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18540 case NE
: code2
= UNKNOWN
; break;
18543 gcc_unreachable ();
18548 * if (hi(a) < hi(b)) goto true;
18549 * if (hi(a) > hi(b)) goto false;
18550 * if (lo(a) < lo(b)) goto true;
18554 if (code1
!= UNKNOWN
)
18555 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18556 if (code2
!= UNKNOWN
)
18557 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18559 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18561 if (code2
!= UNKNOWN
)
18562 emit_label (label2
);
18567 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18572 /* Split branch based on floating point condition. */
18574 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18575 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18580 if (target2
!= pc_rtx
)
18583 code
= reverse_condition_maybe_unordered (code
);
18588 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18591 /* Remove pushed operand from stack. */
18593 ix86_free_from_memory (GET_MODE (pushed
));
18595 i
= emit_jump_insn (gen_rtx_SET
18597 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18598 condition
, target1
, target2
)));
18599 if (split_branch_probability
>= 0)
18600 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18604 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18608 gcc_assert (GET_MODE (dest
) == QImode
);
18610 ret
= ix86_expand_compare (code
, op0
, op1
);
18611 PUT_MODE (ret
, QImode
);
18612 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18615 /* Expand comparison setting or clearing carry flag. Return true when
18616 successful and set pop for the operation. */
18618 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18620 enum machine_mode mode
=
18621 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18623 /* Do not handle double-mode compares that go through special path. */
18624 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18627 if (SCALAR_FLOAT_MODE_P (mode
))
18629 rtx compare_op
, compare_seq
;
18631 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18633 /* Shortcut: following common codes never translate
18634 into carry flag compares. */
18635 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18636 || code
== ORDERED
|| code
== UNORDERED
)
18639 /* These comparisons require zero flag; swap operands so they won't. */
18640 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18641 && !TARGET_IEEE_FP
)
18646 code
= swap_condition (code
);
18649 /* Try to expand the comparison and verify that we end up with
18650 carry flag based comparison. This fails to be true only when
18651 we decide to expand comparison using arithmetic that is not
18652 too common scenario. */
18654 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18655 compare_seq
= get_insns ();
18658 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18659 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18660 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18662 code
= GET_CODE (compare_op
);
18664 if (code
!= LTU
&& code
!= GEU
)
18667 emit_insn (compare_seq
);
18672 if (!INTEGRAL_MODE_P (mode
))
18681 /* Convert a==0 into (unsigned)a<1. */
18684 if (op1
!= const0_rtx
)
18687 code
= (code
== EQ
? LTU
: GEU
);
18690 /* Convert a>b into b<a or a>=b-1. */
18693 if (CONST_INT_P (op1
))
18695 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18696 /* Bail out on overflow. We still can swap operands but that
18697 would force loading of the constant into register. */
18698 if (op1
== const0_rtx
18699 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18701 code
= (code
== GTU
? GEU
: LTU
);
18708 code
= (code
== GTU
? LTU
: GEU
);
18712 /* Convert a>=0 into (unsigned)a<0x80000000. */
18715 if (mode
== DImode
|| op1
!= const0_rtx
)
18717 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18718 code
= (code
== LT
? GEU
: LTU
);
18722 if (mode
== DImode
|| op1
!= constm1_rtx
)
18724 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18725 code
= (code
== LE
? GEU
: LTU
);
18731 /* Swapping operands may cause constant to appear as first operand. */
18732 if (!nonimmediate_operand (op0
, VOIDmode
))
18734 if (!can_create_pseudo_p ())
18736 op0
= force_reg (mode
, op0
);
18738 *pop
= ix86_expand_compare (code
, op0
, op1
);
18739 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18744 ix86_expand_int_movcc (rtx operands
[])
18746 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18747 rtx compare_seq
, compare_op
;
18748 enum machine_mode mode
= GET_MODE (operands
[0]);
18749 bool sign_bit_compare_p
= false;
18750 rtx op0
= XEXP (operands
[1], 0);
18751 rtx op1
= XEXP (operands
[1], 1);
18754 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18755 compare_seq
= get_insns ();
18758 compare_code
= GET_CODE (compare_op
);
18760 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18761 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18762 sign_bit_compare_p
= true;
18764 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18765 HImode insns, we'd be swallowed in word prefix ops. */
18767 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18768 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18769 && CONST_INT_P (operands
[2])
18770 && CONST_INT_P (operands
[3]))
18772 rtx out
= operands
[0];
18773 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18774 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18775 HOST_WIDE_INT diff
;
18778 /* Sign bit compares are better done using shifts than we do by using
18780 if (sign_bit_compare_p
18781 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18783 /* Detect overlap between destination and compare sources. */
18786 if (!sign_bit_compare_p
)
18789 bool fpcmp
= false;
18791 compare_code
= GET_CODE (compare_op
);
18793 flags
= XEXP (compare_op
, 0);
18795 if (GET_MODE (flags
) == CCFPmode
18796 || GET_MODE (flags
) == CCFPUmode
)
18800 = ix86_fp_compare_code_to_integer (compare_code
);
18803 /* To simplify rest of code, restrict to the GEU case. */
18804 if (compare_code
== LTU
)
18806 HOST_WIDE_INT tmp
= ct
;
18809 compare_code
= reverse_condition (compare_code
);
18810 code
= reverse_condition (code
);
18815 PUT_CODE (compare_op
,
18816 reverse_condition_maybe_unordered
18817 (GET_CODE (compare_op
)));
18819 PUT_CODE (compare_op
,
18820 reverse_condition (GET_CODE (compare_op
)));
18824 if (reg_overlap_mentioned_p (out
, op0
)
18825 || reg_overlap_mentioned_p (out
, op1
))
18826 tmp
= gen_reg_rtx (mode
);
18828 if (mode
== DImode
)
18829 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
18831 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
18832 flags
, compare_op
));
18836 if (code
== GT
|| code
== GE
)
18837 code
= reverse_condition (code
);
18840 HOST_WIDE_INT tmp
= ct
;
18845 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18858 tmp
= expand_simple_binop (mode
, PLUS
,
18860 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18871 tmp
= expand_simple_binop (mode
, IOR
,
18873 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18875 else if (diff
== -1 && ct
)
18885 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18887 tmp
= expand_simple_binop (mode
, PLUS
,
18888 copy_rtx (tmp
), GEN_INT (cf
),
18889 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18897 * andl cf - ct, dest
18907 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18910 tmp
= expand_simple_binop (mode
, AND
,
18912 gen_int_mode (cf
- ct
, mode
),
18913 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18915 tmp
= expand_simple_binop (mode
, PLUS
,
18916 copy_rtx (tmp
), GEN_INT (ct
),
18917 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18920 if (!rtx_equal_p (tmp
, out
))
18921 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18928 enum machine_mode cmp_mode
= GET_MODE (op0
);
18931 tmp
= ct
, ct
= cf
, cf
= tmp
;
18934 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18936 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18938 /* We may be reversing unordered compare to normal compare, that
18939 is not valid in general (we may convert non-trapping condition
18940 to trapping one), however on i386 we currently emit all
18941 comparisons unordered. */
18942 compare_code
= reverse_condition_maybe_unordered (compare_code
);
18943 code
= reverse_condition_maybe_unordered (code
);
18947 compare_code
= reverse_condition (compare_code
);
18948 code
= reverse_condition (code
);
18952 compare_code
= UNKNOWN
;
18953 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
18954 && CONST_INT_P (op1
))
18956 if (op1
== const0_rtx
18957 && (code
== LT
|| code
== GE
))
18958 compare_code
= code
;
18959 else if (op1
== constm1_rtx
)
18963 else if (code
== GT
)
18968 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18969 if (compare_code
!= UNKNOWN
18970 && GET_MODE (op0
) == GET_MODE (out
)
18971 && (cf
== -1 || ct
== -1))
18973 /* If lea code below could be used, only optimize
18974 if it results in a 2 insn sequence. */
18976 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18977 || diff
== 3 || diff
== 5 || diff
== 9)
18978 || (compare_code
== LT
&& ct
== -1)
18979 || (compare_code
== GE
&& cf
== -1))
18982 * notl op1 (if necessary)
18990 code
= reverse_condition (code
);
18993 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18995 out
= expand_simple_binop (mode
, IOR
,
18997 out
, 1, OPTAB_DIRECT
);
18998 if (out
!= operands
[0])
18999 emit_move_insn (operands
[0], out
);
19006 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19007 || diff
== 3 || diff
== 5 || diff
== 9)
19008 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19010 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19016 * lea cf(dest*(ct-cf)),dest
19020 * This also catches the degenerate setcc-only case.
19026 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19029 /* On x86_64 the lea instruction operates on Pmode, so we need
19030 to get arithmetics done in proper mode to match. */
19032 tmp
= copy_rtx (out
);
19036 out1
= copy_rtx (out
);
19037 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19041 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19047 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19050 if (!rtx_equal_p (tmp
, out
))
19053 out
= force_operand (tmp
, copy_rtx (out
));
19055 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19057 if (!rtx_equal_p (out
, operands
[0]))
19058 emit_move_insn (operands
[0], copy_rtx (out
));
19064 * General case: Jumpful:
19065 * xorl dest,dest cmpl op1, op2
19066 * cmpl op1, op2 movl ct, dest
19067 * setcc dest jcc 1f
19068 * decl dest movl cf, dest
19069 * andl (cf-ct),dest 1:
19072 * Size 20. Size 14.
19074 * This is reasonably steep, but branch mispredict costs are
19075 * high on modern cpus, so consider failing only if optimizing
19079 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19080 && BRANCH_COST (optimize_insn_for_speed_p (),
19085 enum machine_mode cmp_mode
= GET_MODE (op0
);
19090 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19092 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19094 /* We may be reversing unordered compare to normal compare,
19095 that is not valid in general (we may convert non-trapping
19096 condition to trapping one), however on i386 we currently
19097 emit all comparisons unordered. */
19098 code
= reverse_condition_maybe_unordered (code
);
19102 code
= reverse_condition (code
);
19103 if (compare_code
!= UNKNOWN
)
19104 compare_code
= reverse_condition (compare_code
);
19108 if (compare_code
!= UNKNOWN
)
19110 /* notl op1 (if needed)
19115 For x < 0 (resp. x <= -1) there will be no notl,
19116 so if possible swap the constants to get rid of the
19118 True/false will be -1/0 while code below (store flag
19119 followed by decrement) is 0/-1, so the constants need
19120 to be exchanged once more. */
19122 if (compare_code
== GE
|| !cf
)
19124 code
= reverse_condition (code
);
19129 HOST_WIDE_INT tmp
= cf
;
19134 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19138 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19140 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19142 copy_rtx (out
), 1, OPTAB_DIRECT
);
19145 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19146 gen_int_mode (cf
- ct
, mode
),
19147 copy_rtx (out
), 1, OPTAB_DIRECT
);
19149 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19150 copy_rtx (out
), 1, OPTAB_DIRECT
);
19151 if (!rtx_equal_p (out
, operands
[0]))
19152 emit_move_insn (operands
[0], copy_rtx (out
));
19158 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19160 /* Try a few things more with specific constants and a variable. */
19163 rtx var
, orig_out
, out
, tmp
;
19165 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19168 /* If one of the two operands is an interesting constant, load a
19169 constant with the above and mask it in with a logical operation. */
19171 if (CONST_INT_P (operands
[2]))
19174 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19175 operands
[3] = constm1_rtx
, op
= and_optab
;
19176 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19177 operands
[3] = const0_rtx
, op
= ior_optab
;
19181 else if (CONST_INT_P (operands
[3]))
19184 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19185 operands
[2] = constm1_rtx
, op
= and_optab
;
19186 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19187 operands
[2] = const0_rtx
, op
= ior_optab
;
19194 orig_out
= operands
[0];
19195 tmp
= gen_reg_rtx (mode
);
19198 /* Recurse to get the constant loaded. */
19199 if (ix86_expand_int_movcc (operands
) == 0)
19202 /* Mask in the interesting variable. */
19203 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19205 if (!rtx_equal_p (out
, orig_out
))
19206 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19212 * For comparison with above,
19222 if (! nonimmediate_operand (operands
[2], mode
))
19223 operands
[2] = force_reg (mode
, operands
[2]);
19224 if (! nonimmediate_operand (operands
[3], mode
))
19225 operands
[3] = force_reg (mode
, operands
[3]);
19227 if (! register_operand (operands
[2], VOIDmode
)
19229 || ! register_operand (operands
[3], VOIDmode
)))
19230 operands
[2] = force_reg (mode
, operands
[2]);
19233 && ! register_operand (operands
[3], VOIDmode
))
19234 operands
[3] = force_reg (mode
, operands
[3]);
19236 emit_insn (compare_seq
);
19237 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19238 gen_rtx_IF_THEN_ELSE (mode
,
19239 compare_op
, operands
[2],
19244 /* Swap, force into registers, or otherwise massage the two operands
19245 to an sse comparison with a mask result. Thus we differ a bit from
19246 ix86_prepare_fp_compare_args which expects to produce a flags result.
19248 The DEST operand exists to help determine whether to commute commutative
19249 operators. The POP0/POP1 operands are updated in place. The new
19250 comparison code is returned, or UNKNOWN if not implementable. */
19252 static enum rtx_code
19253 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19254 rtx
*pop0
, rtx
*pop1
)
19262 /* AVX supports all the needed comparisons. */
19265 /* We have no LTGT as an operator. We could implement it with
19266 NE & ORDERED, but this requires an extra temporary. It's
19267 not clear that it's worth it. */
19274 /* These are supported directly. */
19281 /* AVX has 3 operand comparisons, no need to swap anything. */
19284 /* For commutative operators, try to canonicalize the destination
19285 operand to be first in the comparison - this helps reload to
19286 avoid extra moves. */
19287 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19295 /* These are not supported directly before AVX, and furthermore
19296 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19297 comparison operands to transform into something that is
19302 code
= swap_condition (code
);
19306 gcc_unreachable ();
19312 /* Detect conditional moves that exactly match min/max operational
19313 semantics. Note that this is IEEE safe, as long as we don't
19314 interchange the operands.
19316 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19317 and TRUE if the operation is successful and instructions are emitted. */
19320 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19321 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19323 enum machine_mode mode
;
19329 else if (code
== UNGE
)
19332 if_true
= if_false
;
19338 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19340 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19345 mode
= GET_MODE (dest
);
19347 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19348 but MODE may be a vector mode and thus not appropriate. */
19349 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19351 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19354 if_true
= force_reg (mode
, if_true
);
19355 v
= gen_rtvec (2, if_true
, if_false
);
19356 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19360 code
= is_min
? SMIN
: SMAX
;
19361 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19364 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19368 /* Expand an sse vector comparison. Return the register with the result. */
19371 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19372 rtx op_true
, rtx op_false
)
19374 enum machine_mode mode
= GET_MODE (dest
);
19375 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19378 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19379 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19380 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19383 || reg_overlap_mentioned_p (dest
, op_true
)
19384 || reg_overlap_mentioned_p (dest
, op_false
))
19385 dest
= gen_reg_rtx (mode
);
19387 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19388 if (cmp_mode
!= mode
)
19390 x
= force_reg (cmp_mode
, x
);
19391 convert_move (dest
, x
, false);
19394 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19399 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19400 operations. This is used for both scalar and vector conditional moves. */
19403 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19405 enum machine_mode mode
= GET_MODE (dest
);
19408 if (vector_all_ones_operand (op_true
, mode
)
19409 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19411 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19413 else if (op_false
== CONST0_RTX (mode
))
19415 op_true
= force_reg (mode
, op_true
);
19416 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19417 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19419 else if (op_true
== CONST0_RTX (mode
))
19421 op_false
= force_reg (mode
, op_false
);
19422 x
= gen_rtx_NOT (mode
, cmp
);
19423 x
= gen_rtx_AND (mode
, x
, op_false
);
19424 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19426 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19428 op_false
= force_reg (mode
, op_false
);
19429 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19430 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19432 else if (TARGET_XOP
)
19434 op_true
= force_reg (mode
, op_true
);
19436 if (!nonimmediate_operand (op_false
, mode
))
19437 op_false
= force_reg (mode
, op_false
);
19439 emit_insn (gen_rtx_SET (mode
, dest
,
19440 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19446 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19448 if (!nonimmediate_operand (op_true
, mode
))
19449 op_true
= force_reg (mode
, op_true
);
19451 op_false
= force_reg (mode
, op_false
);
19457 gen
= gen_sse4_1_blendvps
;
19461 gen
= gen_sse4_1_blendvpd
;
19469 gen
= gen_sse4_1_pblendvb
;
19470 dest
= gen_lowpart (V16QImode
, dest
);
19471 op_false
= gen_lowpart (V16QImode
, op_false
);
19472 op_true
= gen_lowpart (V16QImode
, op_true
);
19473 cmp
= gen_lowpart (V16QImode
, cmp
);
19478 gen
= gen_avx_blendvps256
;
19482 gen
= gen_avx_blendvpd256
;
19490 gen
= gen_avx2_pblendvb
;
19491 dest
= gen_lowpart (V32QImode
, dest
);
19492 op_false
= gen_lowpart (V32QImode
, op_false
);
19493 op_true
= gen_lowpart (V32QImode
, op_true
);
19494 cmp
= gen_lowpart (V32QImode
, cmp
);
19502 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19505 op_true
= force_reg (mode
, op_true
);
19507 t2
= gen_reg_rtx (mode
);
19509 t3
= gen_reg_rtx (mode
);
19513 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19514 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19516 x
= gen_rtx_NOT (mode
, cmp
);
19517 x
= gen_rtx_AND (mode
, x
, op_false
);
19518 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19520 x
= gen_rtx_IOR (mode
, t3
, t2
);
19521 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19526 /* Expand a floating-point conditional move. Return true if successful. */
19529 ix86_expand_fp_movcc (rtx operands
[])
19531 enum machine_mode mode
= GET_MODE (operands
[0]);
19532 enum rtx_code code
= GET_CODE (operands
[1]);
19533 rtx tmp
, compare_op
;
19534 rtx op0
= XEXP (operands
[1], 0);
19535 rtx op1
= XEXP (operands
[1], 1);
19537 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19539 enum machine_mode cmode
;
19541 /* Since we've no cmove for sse registers, don't force bad register
19542 allocation just to gain access to it. Deny movcc when the
19543 comparison mode doesn't match the move mode. */
19544 cmode
= GET_MODE (op0
);
19545 if (cmode
== VOIDmode
)
19546 cmode
= GET_MODE (op1
);
19550 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19551 if (code
== UNKNOWN
)
19554 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19555 operands
[2], operands
[3]))
19558 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19559 operands
[2], operands
[3]);
19560 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19564 /* The floating point conditional move instructions don't directly
19565 support conditions resulting from a signed integer comparison. */
19567 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19568 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19570 tmp
= gen_reg_rtx (QImode
);
19571 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19573 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19576 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19577 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19578 operands
[2], operands
[3])));
19583 /* Expand a floating-point vector conditional move; a vcond operation
19584 rather than a movcc operation. */
19587 ix86_expand_fp_vcond (rtx operands
[])
19589 enum rtx_code code
= GET_CODE (operands
[3]);
19592 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19593 &operands
[4], &operands
[5]);
19594 if (code
== UNKNOWN
)
19597 switch (GET_CODE (operands
[3]))
19600 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19601 operands
[5], operands
[0], operands
[0]);
19602 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19603 operands
[5], operands
[1], operands
[2]);
19607 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19608 operands
[5], operands
[0], operands
[0]);
19609 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19610 operands
[5], operands
[1], operands
[2]);
19614 gcc_unreachable ();
19616 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19618 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19622 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19623 operands
[5], operands
[1], operands
[2]))
19626 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19627 operands
[1], operands
[2]);
19628 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19632 /* Expand a signed/unsigned integral vector conditional move. */
19635 ix86_expand_int_vcond (rtx operands
[])
19637 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19638 enum machine_mode mode
= GET_MODE (operands
[4]);
19639 enum rtx_code code
= GET_CODE (operands
[3]);
19640 bool negate
= false;
19643 cop0
= operands
[4];
19644 cop1
= operands
[5];
19646 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19647 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19648 if ((code
== LT
|| code
== GE
)
19649 && data_mode
== mode
19650 && cop1
== CONST0_RTX (mode
)
19651 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
19652 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
19653 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
19654 && (GET_MODE_SIZE (data_mode
) == 16
19655 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
19657 rtx negop
= operands
[2 - (code
== LT
)];
19658 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
19659 if (negop
== CONST1_RTX (data_mode
))
19661 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
19662 operands
[0], 1, OPTAB_DIRECT
);
19663 if (res
!= operands
[0])
19664 emit_move_insn (operands
[0], res
);
19667 else if (GET_MODE_INNER (data_mode
) != DImode
19668 && vector_all_ones_operand (negop
, data_mode
))
19670 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
19671 operands
[0], 0, OPTAB_DIRECT
);
19672 if (res
!= operands
[0])
19673 emit_move_insn (operands
[0], res
);
19678 if (!nonimmediate_operand (cop1
, mode
))
19679 cop1
= force_reg (mode
, cop1
);
19680 if (!general_operand (operands
[1], data_mode
))
19681 operands
[1] = force_reg (data_mode
, operands
[1]);
19682 if (!general_operand (operands
[2], data_mode
))
19683 operands
[2] = force_reg (data_mode
, operands
[2]);
19685 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19687 && (mode
== V16QImode
|| mode
== V8HImode
19688 || mode
== V4SImode
|| mode
== V2DImode
))
19692 /* Canonicalize the comparison to EQ, GT, GTU. */
19703 code
= reverse_condition (code
);
19709 code
= reverse_condition (code
);
19715 code
= swap_condition (code
);
19716 x
= cop0
, cop0
= cop1
, cop1
= x
;
19720 gcc_unreachable ();
19723 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19724 if (mode
== V2DImode
)
19729 /* SSE4.1 supports EQ. */
19730 if (!TARGET_SSE4_1
)
19736 /* SSE4.2 supports GT/GTU. */
19737 if (!TARGET_SSE4_2
)
19742 gcc_unreachable ();
19746 /* Unsigned parallel compare is not supported by the hardware.
19747 Play some tricks to turn this into a signed comparison
19751 cop0
= force_reg (mode
, cop0
);
19761 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19765 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19766 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19767 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19768 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19770 gcc_unreachable ();
19772 /* Subtract (-(INT MAX) - 1) from both operands to make
19774 mask
= ix86_build_signbit_mask (mode
, true, false);
19775 t1
= gen_reg_rtx (mode
);
19776 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19778 t2
= gen_reg_rtx (mode
);
19779 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19791 /* Perform a parallel unsigned saturating subtraction. */
19792 x
= gen_reg_rtx (mode
);
19793 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19794 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19797 cop1
= CONST0_RTX (mode
);
19803 gcc_unreachable ();
19808 /* Allow the comparison to be done in one mode, but the movcc to
19809 happen in another mode. */
19810 if (data_mode
== mode
)
19812 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
19813 operands
[1+negate
], operands
[2-negate
]);
19817 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
19818 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
19820 operands
[1+negate
], operands
[2-negate
]);
19821 x
= gen_lowpart (data_mode
, x
);
19824 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
19825 operands
[2-negate
]);
19829 /* Expand a variable vector permutation. */
19832 ix86_expand_vec_perm (rtx operands
[])
19834 rtx target
= operands
[0];
19835 rtx op0
= operands
[1];
19836 rtx op1
= operands
[2];
19837 rtx mask
= operands
[3];
19838 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
19839 enum machine_mode mode
= GET_MODE (op0
);
19840 enum machine_mode maskmode
= GET_MODE (mask
);
19842 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
19844 /* Number of elements in the vector. */
19845 w
= GET_MODE_NUNITS (mode
);
19846 e
= GET_MODE_UNIT_SIZE (mode
);
19847 gcc_assert (w
<= 32);
19851 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
19853 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19854 an constant shuffle operand. With a tiny bit of effort we can
19855 use VPERMD instead. A re-interpretation stall for V4DFmode is
19856 unfortunate but there's no avoiding it.
19857 Similarly for V16HImode we don't have instructions for variable
19858 shuffling, while for V32QImode we can use after preparing suitable
19859 masks vpshufb; vpshufb; vpermq; vpor. */
19861 if (mode
== V16HImode
)
19863 maskmode
= mode
= V32QImode
;
19869 maskmode
= mode
= V8SImode
;
19873 t1
= gen_reg_rtx (maskmode
);
19875 /* Replicate the low bits of the V4DImode mask into V8SImode:
19877 t1 = { A A B B C C D D }. */
19878 for (i
= 0; i
< w
/ 2; ++i
)
19879 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
19880 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19881 vt
= force_reg (maskmode
, vt
);
19882 mask
= gen_lowpart (maskmode
, mask
);
19883 if (maskmode
== V8SImode
)
19884 emit_insn (gen_avx2_permvarv8si (t1
, vt
, mask
));
19886 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
19888 /* Multiply the shuffle indicies by two. */
19889 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
19892 /* Add one to the odd shuffle indicies:
19893 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19894 for (i
= 0; i
< w
/ 2; ++i
)
19896 vec
[i
* 2] = const0_rtx
;
19897 vec
[i
* 2 + 1] = const1_rtx
;
19899 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19900 vt
= force_const_mem (maskmode
, vt
);
19901 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
19904 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19905 operands
[3] = mask
= t1
;
19906 target
= gen_lowpart (mode
, target
);
19907 op0
= gen_lowpart (mode
, op0
);
19908 op1
= gen_lowpart (mode
, op1
);
19914 /* The VPERMD and VPERMPS instructions already properly ignore
19915 the high bits of the shuffle elements. No need for us to
19916 perform an AND ourselves. */
19917 if (one_operand_shuffle
)
19918 emit_insn (gen_avx2_permvarv8si (target
, mask
, op0
));
19921 t1
= gen_reg_rtx (V8SImode
);
19922 t2
= gen_reg_rtx (V8SImode
);
19923 emit_insn (gen_avx2_permvarv8si (t1
, mask
, op0
));
19924 emit_insn (gen_avx2_permvarv8si (t2
, mask
, op1
));
19930 mask
= gen_lowpart (V8SFmode
, mask
);
19931 if (one_operand_shuffle
)
19932 emit_insn (gen_avx2_permvarv8sf (target
, mask
, op0
));
19935 t1
= gen_reg_rtx (V8SFmode
);
19936 t2
= gen_reg_rtx (V8SFmode
);
19937 emit_insn (gen_avx2_permvarv8sf (t1
, mask
, op0
));
19938 emit_insn (gen_avx2_permvarv8sf (t2
, mask
, op1
));
19944 /* By combining the two 128-bit input vectors into one 256-bit
19945 input vector, we can use VPERMD and VPERMPS for the full
19946 two-operand shuffle. */
19947 t1
= gen_reg_rtx (V8SImode
);
19948 t2
= gen_reg_rtx (V8SImode
);
19949 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
19950 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
19951 emit_insn (gen_avx2_permvarv8si (t1
, t2
, t1
));
19952 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
19956 t1
= gen_reg_rtx (V8SFmode
);
19957 t2
= gen_reg_rtx (V8SFmode
);
19958 mask
= gen_lowpart (V4SFmode
, mask
);
19959 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
19960 emit_insn (gen_avx_vec_concatv8sf (t2
, mask
, mask
));
19961 emit_insn (gen_avx2_permvarv8sf (t1
, t2
, t1
));
19962 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
19966 t1
= gen_reg_rtx (V32QImode
);
19967 t2
= gen_reg_rtx (V32QImode
);
19968 t3
= gen_reg_rtx (V32QImode
);
19969 vt2
= GEN_INT (128);
19970 for (i
= 0; i
< 32; i
++)
19972 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19973 vt
= force_reg (V32QImode
, vt
);
19974 for (i
= 0; i
< 32; i
++)
19975 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
19976 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19977 vt2
= force_reg (V32QImode
, vt2
);
19978 /* From mask create two adjusted masks, which contain the same
19979 bits as mask in the low 7 bits of each vector element.
19980 The first mask will have the most significant bit clear
19981 if it requests element from the same 128-bit lane
19982 and MSB set if it requests element from the other 128-bit lane.
19983 The second mask will have the opposite values of the MSB,
19984 and additionally will have its 128-bit lanes swapped.
19985 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19986 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19987 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19988 stands for other 12 bytes. */
19989 /* The bit whether element is from the same lane or the other
19990 lane is bit 4, so shift it up by 3 to the MSB position. */
19991 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
19992 gen_lowpart (V4DImode
, mask
),
19994 /* Clear MSB bits from the mask just in case it had them set. */
19995 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
19996 /* After this t1 will have MSB set for elements from other lane. */
19997 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
19998 /* Clear bits other than MSB. */
19999 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20000 /* Or in the lower bits from mask into t3. */
20001 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20002 /* And invert MSB bits in t1, so MSB is set for elements from the same
20004 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20005 /* Swap 128-bit lanes in t3. */
20006 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20007 gen_lowpart (V4DImode
, t3
),
20008 const2_rtx
, GEN_INT (3),
20009 const0_rtx
, const1_rtx
));
20010 /* And or in the lower bits from mask into t1. */
20011 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20012 if (one_operand_shuffle
)
20014 /* Each of these shuffles will put 0s in places where
20015 element from the other 128-bit lane is needed, otherwise
20016 will shuffle in the requested value. */
20017 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20018 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20019 /* For t3 the 128-bit lanes are swapped again. */
20020 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20021 gen_lowpart (V4DImode
, t3
),
20022 const2_rtx
, GEN_INT (3),
20023 const0_rtx
, const1_rtx
));
20024 /* And oring both together leads to the result. */
20025 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20029 t4
= gen_reg_rtx (V32QImode
);
20030 /* Similarly to the above one_operand_shuffle code,
20031 just for repeated twice for each operand. merge_two:
20032 code will merge the two results together. */
20033 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20034 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20035 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20036 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20037 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20038 gen_lowpart (V4DImode
, t4
),
20039 const2_rtx
, GEN_INT (3),
20040 const0_rtx
, const1_rtx
));
20041 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20042 gen_lowpart (V4DImode
, t3
),
20043 const2_rtx
, GEN_INT (3),
20044 const0_rtx
, const1_rtx
));
20045 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20046 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20052 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20059 /* The XOP VPPERM insn supports three inputs. By ignoring the
20060 one_operand_shuffle special case, we avoid creating another
20061 set of constant vectors in memory. */
20062 one_operand_shuffle
= false;
20064 /* mask = mask & {2*w-1, ...} */
20065 vt
= GEN_INT (2*w
- 1);
20069 /* mask = mask & {w-1, ...} */
20070 vt
= GEN_INT (w
- 1);
20073 for (i
= 0; i
< w
; i
++)
20075 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20076 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20077 NULL_RTX
, 0, OPTAB_DIRECT
);
20079 /* For non-QImode operations, convert the word permutation control
20080 into a byte permutation control. */
20081 if (mode
!= V16QImode
)
20083 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20084 GEN_INT (exact_log2 (e
)),
20085 NULL_RTX
, 0, OPTAB_DIRECT
);
20087 /* Convert mask to vector of chars. */
20088 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20090 /* Replicate each of the input bytes into byte positions:
20091 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20092 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20093 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20094 for (i
= 0; i
< 16; ++i
)
20095 vec
[i
] = GEN_INT (i
/e
* e
);
20096 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20097 vt
= force_const_mem (V16QImode
, vt
);
20099 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20101 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20103 /* Convert it into the byte positions by doing
20104 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20105 for (i
= 0; i
< 16; ++i
)
20106 vec
[i
] = GEN_INT (i
% e
);
20107 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20108 vt
= force_const_mem (V16QImode
, vt
);
20109 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20112 /* The actual shuffle operations all operate on V16QImode. */
20113 op0
= gen_lowpart (V16QImode
, op0
);
20114 op1
= gen_lowpart (V16QImode
, op1
);
20115 target
= gen_lowpart (V16QImode
, target
);
20119 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20121 else if (one_operand_shuffle
)
20123 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20130 /* Shuffle the two input vectors independently. */
20131 t1
= gen_reg_rtx (V16QImode
);
20132 t2
= gen_reg_rtx (V16QImode
);
20133 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20134 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20137 /* Then merge them together. The key is whether any given control
20138 element contained a bit set that indicates the second word. */
20139 mask
= operands
[3];
20141 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20143 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20144 more shuffle to convert the V2DI input mask into a V4SI
20145 input mask. At which point the masking that expand_int_vcond
20146 will work as desired. */
20147 rtx t3
= gen_reg_rtx (V4SImode
);
20148 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20149 const0_rtx
, const0_rtx
,
20150 const2_rtx
, const2_rtx
));
20152 maskmode
= V4SImode
;
20156 for (i
= 0; i
< w
; i
++)
20158 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20159 vt
= force_reg (maskmode
, vt
);
20160 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20161 NULL_RTX
, 0, OPTAB_DIRECT
);
20163 xops
[0] = gen_lowpart (mode
, operands
[0]);
20164 xops
[1] = gen_lowpart (mode
, t2
);
20165 xops
[2] = gen_lowpart (mode
, t1
);
20166 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20169 ok
= ix86_expand_int_vcond (xops
);
20174 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20175 true if we should do zero extension, else sign extension. HIGH_P is
20176 true if we want the N/2 high elements, else the low elements. */
20179 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
20181 enum machine_mode imode
= GET_MODE (operands
[1]);
20186 rtx (*unpack
)(rtx
, rtx
);
20187 rtx (*extract
)(rtx
, rtx
) = NULL
;
20188 enum machine_mode halfmode
= BLKmode
;
20194 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20196 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20197 halfmode
= V16QImode
;
20199 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20203 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20205 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20206 halfmode
= V8HImode
;
20208 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20212 unpack
= gen_avx2_zero_extendv4siv4di2
;
20214 unpack
= gen_avx2_sign_extendv4siv4di2
;
20215 halfmode
= V4SImode
;
20217 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20221 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20223 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20227 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20229 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20233 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20235 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20238 gcc_unreachable ();
20241 if (GET_MODE_SIZE (imode
) == 32)
20243 tmp
= gen_reg_rtx (halfmode
);
20244 emit_insn (extract (tmp
, operands
[1]));
20248 /* Shift higher 8 bytes to lower 8 bytes. */
20249 tmp
= gen_reg_rtx (imode
);
20250 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20251 gen_lowpart (V1TImode
, operands
[1]),
20257 emit_insn (unpack (operands
[0], tmp
));
20261 rtx (*unpack
)(rtx
, rtx
, rtx
);
20267 unpack
= gen_vec_interleave_highv16qi
;
20269 unpack
= gen_vec_interleave_lowv16qi
;
20273 unpack
= gen_vec_interleave_highv8hi
;
20275 unpack
= gen_vec_interleave_lowv8hi
;
20279 unpack
= gen_vec_interleave_highv4si
;
20281 unpack
= gen_vec_interleave_lowv4si
;
20284 gcc_unreachable ();
20287 dest
= gen_lowpart (imode
, operands
[0]);
20290 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20292 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20293 operands
[1], pc_rtx
, pc_rtx
);
20295 emit_insn (unpack (dest
, operands
[1], tmp
));
20299 /* Expand conditional increment or decrement using adb/sbb instructions.
20300 The default case using setcc followed by the conditional move can be
20301 done by generic code. */
20303 ix86_expand_int_addcc (rtx operands
[])
20305 enum rtx_code code
= GET_CODE (operands
[1]);
20307 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20309 rtx val
= const0_rtx
;
20310 bool fpcmp
= false;
20311 enum machine_mode mode
;
20312 rtx op0
= XEXP (operands
[1], 0);
20313 rtx op1
= XEXP (operands
[1], 1);
20315 if (operands
[3] != const1_rtx
20316 && operands
[3] != constm1_rtx
)
20318 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20320 code
= GET_CODE (compare_op
);
20322 flags
= XEXP (compare_op
, 0);
20324 if (GET_MODE (flags
) == CCFPmode
20325 || GET_MODE (flags
) == CCFPUmode
)
20328 code
= ix86_fp_compare_code_to_integer (code
);
20335 PUT_CODE (compare_op
,
20336 reverse_condition_maybe_unordered
20337 (GET_CODE (compare_op
)));
20339 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20342 mode
= GET_MODE (operands
[0]);
20344 /* Construct either adc or sbb insn. */
20345 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20350 insn
= gen_subqi3_carry
;
20353 insn
= gen_subhi3_carry
;
20356 insn
= gen_subsi3_carry
;
20359 insn
= gen_subdi3_carry
;
20362 gcc_unreachable ();
20370 insn
= gen_addqi3_carry
;
20373 insn
= gen_addhi3_carry
;
20376 insn
= gen_addsi3_carry
;
20379 insn
= gen_adddi3_carry
;
20382 gcc_unreachable ();
20385 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20391 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20392 but works for floating pointer parameters and nonoffsetable memories.
20393 For pushes, it returns just stack offsets; the values will be saved
20394 in the right order. Maximally three parts are generated. */
20397 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20402 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20404 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20406 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20407 gcc_assert (size
>= 2 && size
<= 4);
20409 /* Optimize constant pool reference to immediates. This is used by fp
20410 moves, that force all constants to memory to allow combining. */
20411 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20413 rtx tmp
= maybe_get_pool_constant (operand
);
20418 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20420 /* The only non-offsetable memories we handle are pushes. */
20421 int ok
= push_operand (operand
, VOIDmode
);
20425 operand
= copy_rtx (operand
);
20426 PUT_MODE (operand
, word_mode
);
20427 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20431 if (GET_CODE (operand
) == CONST_VECTOR
)
20433 enum machine_mode imode
= int_mode_for_mode (mode
);
20434 /* Caution: if we looked through a constant pool memory above,
20435 the operand may actually have a different mode now. That's
20436 ok, since we want to pun this all the way back to an integer. */
20437 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20438 gcc_assert (operand
!= NULL
);
20444 if (mode
== DImode
)
20445 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20450 if (REG_P (operand
))
20452 gcc_assert (reload_completed
);
20453 for (i
= 0; i
< size
; i
++)
20454 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20456 else if (offsettable_memref_p (operand
))
20458 operand
= adjust_address (operand
, SImode
, 0);
20459 parts
[0] = operand
;
20460 for (i
= 1; i
< size
; i
++)
20461 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20463 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20468 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20472 real_to_target (l
, &r
, mode
);
20473 parts
[3] = gen_int_mode (l
[3], SImode
);
20474 parts
[2] = gen_int_mode (l
[2], SImode
);
20477 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20478 parts
[2] = gen_int_mode (l
[2], SImode
);
20481 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20484 gcc_unreachable ();
20486 parts
[1] = gen_int_mode (l
[1], SImode
);
20487 parts
[0] = gen_int_mode (l
[0], SImode
);
20490 gcc_unreachable ();
20495 if (mode
== TImode
)
20496 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20497 if (mode
== XFmode
|| mode
== TFmode
)
20499 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20500 if (REG_P (operand
))
20502 gcc_assert (reload_completed
);
20503 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20504 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20506 else if (offsettable_memref_p (operand
))
20508 operand
= adjust_address (operand
, DImode
, 0);
20509 parts
[0] = operand
;
20510 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20512 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20517 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20518 real_to_target (l
, &r
, mode
);
20520 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20521 if (HOST_BITS_PER_WIDE_INT
>= 64)
20524 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20525 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20528 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20530 if (upper_mode
== SImode
)
20531 parts
[1] = gen_int_mode (l
[2], SImode
);
20532 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20535 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20536 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20539 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20542 gcc_unreachable ();
20549 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20550 Return false when normal moves are needed; true when all required
20551 insns have been emitted. Operands 2-4 contain the input values
20552 int the correct order; operands 5-7 contain the output values. */
20555 ix86_split_long_move (rtx operands
[])
20560 int collisions
= 0;
20561 enum machine_mode mode
= GET_MODE (operands
[0]);
20562 bool collisionparts
[4];
20564 /* The DFmode expanders may ask us to move double.
20565 For 64bit target this is single move. By hiding the fact
20566 here we simplify i386.md splitters. */
20567 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20569 /* Optimize constant pool reference to immediates. This is used by
20570 fp moves, that force all constants to memory to allow combining. */
20572 if (MEM_P (operands
[1])
20573 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20574 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20575 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20576 if (push_operand (operands
[0], VOIDmode
))
20578 operands
[0] = copy_rtx (operands
[0]);
20579 PUT_MODE (operands
[0], word_mode
);
20582 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20583 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20584 emit_move_insn (operands
[0], operands
[1]);
20588 /* The only non-offsettable memory we handle is push. */
20589 if (push_operand (operands
[0], VOIDmode
))
20592 gcc_assert (!MEM_P (operands
[0])
20593 || offsettable_memref_p (operands
[0]));
20595 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20596 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20598 /* When emitting push, take care for source operands on the stack. */
20599 if (push
&& MEM_P (operands
[1])
20600 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20602 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20604 /* Compensate for the stack decrement by 4. */
20605 if (!TARGET_64BIT
&& nparts
== 3
20606 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20607 src_base
= plus_constant (src_base
, 4);
20609 /* src_base refers to the stack pointer and is
20610 automatically decreased by emitted push. */
20611 for (i
= 0; i
< nparts
; i
++)
20612 part
[1][i
] = change_address (part
[1][i
],
20613 GET_MODE (part
[1][i
]), src_base
);
20616 /* We need to do copy in the right order in case an address register
20617 of the source overlaps the destination. */
20618 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20622 for (i
= 0; i
< nparts
; i
++)
20625 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20626 if (collisionparts
[i
])
20630 /* Collision in the middle part can be handled by reordering. */
20631 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20633 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20634 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20636 else if (collisions
== 1
20638 && (collisionparts
[1] || collisionparts
[2]))
20640 if (collisionparts
[1])
20642 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20643 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20647 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20648 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20652 /* If there are more collisions, we can't handle it by reordering.
20653 Do an lea to the last part and use only one colliding move. */
20654 else if (collisions
> 1)
20660 base
= part
[0][nparts
- 1];
20662 /* Handle the case when the last part isn't valid for lea.
20663 Happens in 64-bit mode storing the 12-byte XFmode. */
20664 if (GET_MODE (base
) != Pmode
)
20665 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20667 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20668 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20669 for (i
= 1; i
< nparts
; i
++)
20671 tmp
= plus_constant (base
, UNITS_PER_WORD
* i
);
20672 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20683 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20684 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
20685 stack_pointer_rtx
, GEN_INT (-4)));
20686 emit_move_insn (part
[0][2], part
[1][2]);
20688 else if (nparts
== 4)
20690 emit_move_insn (part
[0][3], part
[1][3]);
20691 emit_move_insn (part
[0][2], part
[1][2]);
20696 /* In 64bit mode we don't have 32bit push available. In case this is
20697 register, it is OK - we will just use larger counterpart. We also
20698 retype memory - these comes from attempt to avoid REX prefix on
20699 moving of second half of TFmode value. */
20700 if (GET_MODE (part
[1][1]) == SImode
)
20702 switch (GET_CODE (part
[1][1]))
20705 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20709 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20713 gcc_unreachable ();
20716 if (GET_MODE (part
[1][0]) == SImode
)
20717 part
[1][0] = part
[1][1];
20720 emit_move_insn (part
[0][1], part
[1][1]);
20721 emit_move_insn (part
[0][0], part
[1][0]);
20725 /* Choose correct order to not overwrite the source before it is copied. */
20726 if ((REG_P (part
[0][0])
20727 && REG_P (part
[1][1])
20728 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20730 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20732 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20734 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20736 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20738 operands
[2 + i
] = part
[0][j
];
20739 operands
[6 + i
] = part
[1][j
];
20744 for (i
= 0; i
< nparts
; i
++)
20746 operands
[2 + i
] = part
[0][i
];
20747 operands
[6 + i
] = part
[1][i
];
20751 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20752 if (optimize_insn_for_size_p ())
20754 for (j
= 0; j
< nparts
- 1; j
++)
20755 if (CONST_INT_P (operands
[6 + j
])
20756 && operands
[6 + j
] != const0_rtx
20757 && REG_P (operands
[2 + j
]))
20758 for (i
= j
; i
< nparts
- 1; i
++)
20759 if (CONST_INT_P (operands
[7 + i
])
20760 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20761 operands
[7 + i
] = operands
[2 + j
];
20764 for (i
= 0; i
< nparts
; i
++)
20765 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20770 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20771 left shift by a constant, either using a single shift or
20772 a sequence of add instructions. */
20775 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20777 rtx (*insn
)(rtx
, rtx
, rtx
);
20780 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20781 && !optimize_insn_for_size_p ()))
20783 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20784 while (count
-- > 0)
20785 emit_insn (insn (operand
, operand
, operand
));
20789 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20790 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20795 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20797 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20798 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20799 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20801 rtx low
[2], high
[2];
20804 if (CONST_INT_P (operands
[2]))
20806 split_double_mode (mode
, operands
, 2, low
, high
);
20807 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20809 if (count
>= half_width
)
20811 emit_move_insn (high
[0], low
[1]);
20812 emit_move_insn (low
[0], const0_rtx
);
20814 if (count
> half_width
)
20815 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
20819 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20821 if (!rtx_equal_p (operands
[0], operands
[1]))
20822 emit_move_insn (operands
[0], operands
[1]);
20824 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
20825 ix86_expand_ashl_const (low
[0], count
, mode
);
20830 split_double_mode (mode
, operands
, 1, low
, high
);
20832 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20834 if (operands
[1] == const1_rtx
)
20836 /* Assuming we've chosen a QImode capable registers, then 1 << N
20837 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20838 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
20840 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
20842 ix86_expand_clear (low
[0]);
20843 ix86_expand_clear (high
[0]);
20844 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
20846 d
= gen_lowpart (QImode
, low
[0]);
20847 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20848 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
20849 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20851 d
= gen_lowpart (QImode
, high
[0]);
20852 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20853 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
20854 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20857 /* Otherwise, we can get the same results by manually performing
20858 a bit extract operation on bit 5/6, and then performing the two
20859 shifts. The two methods of getting 0/1 into low/high are exactly
20860 the same size. Avoiding the shift in the bit extract case helps
20861 pentium4 a bit; no one else seems to care much either way. */
20864 enum machine_mode half_mode
;
20865 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
20866 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
20867 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
20868 HOST_WIDE_INT bits
;
20871 if (mode
== DImode
)
20873 half_mode
= SImode
;
20874 gen_lshr3
= gen_lshrsi3
;
20875 gen_and3
= gen_andsi3
;
20876 gen_xor3
= gen_xorsi3
;
20881 half_mode
= DImode
;
20882 gen_lshr3
= gen_lshrdi3
;
20883 gen_and3
= gen_anddi3
;
20884 gen_xor3
= gen_xordi3
;
20888 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
20889 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
20891 x
= gen_lowpart (half_mode
, operands
[2]);
20892 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
20894 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
20895 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
20896 emit_move_insn (low
[0], high
[0]);
20897 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
20900 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20901 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
20905 if (operands
[1] == constm1_rtx
)
20907 /* For -1 << N, we can avoid the shld instruction, because we
20908 know that we're shifting 0...31/63 ones into a -1. */
20909 emit_move_insn (low
[0], constm1_rtx
);
20910 if (optimize_insn_for_size_p ())
20911 emit_move_insn (high
[0], low
[0]);
20913 emit_move_insn (high
[0], constm1_rtx
);
20917 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20919 if (!rtx_equal_p (operands
[0], operands
[1]))
20920 emit_move_insn (operands
[0], operands
[1]);
20922 split_double_mode (mode
, operands
, 1, low
, high
);
20923 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
20926 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20928 if (TARGET_CMOVE
&& scratch
)
20930 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20931 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20933 ix86_expand_clear (scratch
);
20934 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
20938 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20939 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20941 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
20946 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20948 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
20949 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
20950 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20951 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20953 rtx low
[2], high
[2];
20956 if (CONST_INT_P (operands
[2]))
20958 split_double_mode (mode
, operands
, 2, low
, high
);
20959 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20961 if (count
== GET_MODE_BITSIZE (mode
) - 1)
20963 emit_move_insn (high
[0], high
[1]);
20964 emit_insn (gen_ashr3 (high
[0], high
[0],
20965 GEN_INT (half_width
- 1)));
20966 emit_move_insn (low
[0], high
[0]);
20969 else if (count
>= half_width
)
20971 emit_move_insn (low
[0], high
[1]);
20972 emit_move_insn (high
[0], low
[0]);
20973 emit_insn (gen_ashr3 (high
[0], high
[0],
20974 GEN_INT (half_width
- 1)));
20976 if (count
> half_width
)
20977 emit_insn (gen_ashr3 (low
[0], low
[0],
20978 GEN_INT (count
- half_width
)));
20982 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20984 if (!rtx_equal_p (operands
[0], operands
[1]))
20985 emit_move_insn (operands
[0], operands
[1]);
20987 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20988 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
20993 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20995 if (!rtx_equal_p (operands
[0], operands
[1]))
20996 emit_move_insn (operands
[0], operands
[1]);
20998 split_double_mode (mode
, operands
, 1, low
, high
);
21000 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21001 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21003 if (TARGET_CMOVE
&& scratch
)
21005 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21006 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21008 emit_move_insn (scratch
, high
[0]);
21009 emit_insn (gen_ashr3 (scratch
, scratch
,
21010 GEN_INT (half_width
- 1)));
21011 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21016 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21017 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21019 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21025 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21027 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21028 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21029 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21030 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21032 rtx low
[2], high
[2];
21035 if (CONST_INT_P (operands
[2]))
21037 split_double_mode (mode
, operands
, 2, low
, high
);
21038 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21040 if (count
>= half_width
)
21042 emit_move_insn (low
[0], high
[1]);
21043 ix86_expand_clear (high
[0]);
21045 if (count
> half_width
)
21046 emit_insn (gen_lshr3 (low
[0], low
[0],
21047 GEN_INT (count
- half_width
)));
21051 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21053 if (!rtx_equal_p (operands
[0], operands
[1]))
21054 emit_move_insn (operands
[0], operands
[1]);
21056 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21057 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21062 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21064 if (!rtx_equal_p (operands
[0], operands
[1]))
21065 emit_move_insn (operands
[0], operands
[1]);
21067 split_double_mode (mode
, operands
, 1, low
, high
);
21069 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21070 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21072 if (TARGET_CMOVE
&& scratch
)
21074 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21075 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21077 ix86_expand_clear (scratch
);
21078 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21083 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21084 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21086 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21091 /* Predict just emitted jump instruction to be taken with probability PROB. */
21093 predict_jump (int prob
)
21095 rtx insn
= get_last_insn ();
21096 gcc_assert (JUMP_P (insn
));
21097 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21100 /* Helper function for the string operations below. Dest VARIABLE whether
21101 it is aligned to VALUE bytes. If true, jump to the label. */
21103 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21105 rtx label
= gen_label_rtx ();
21106 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21107 if (GET_MODE (variable
) == DImode
)
21108 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21110 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21111 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21114 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21116 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21120 /* Adjust COUNTER by the VALUE. */
21122 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21124 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21125 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21127 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21130 /* Zero extend possibly SImode EXP to Pmode register. */
21132 ix86_zero_extend_to_Pmode (rtx exp
)
21134 if (GET_MODE (exp
) != Pmode
)
21135 exp
= convert_to_mode (Pmode
, exp
, 1);
21136 return force_reg (Pmode
, exp
);
21139 /* Divide COUNTREG by SCALE. */
21141 scale_counter (rtx countreg
, int scale
)
21147 if (CONST_INT_P (countreg
))
21148 return GEN_INT (INTVAL (countreg
) / scale
);
21149 gcc_assert (REG_P (countreg
));
21151 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21152 GEN_INT (exact_log2 (scale
)),
21153 NULL
, 1, OPTAB_DIRECT
);
21157 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21158 DImode for constant loop counts. */
21160 static enum machine_mode
21161 counter_mode (rtx count_exp
)
21163 if (GET_MODE (count_exp
) != VOIDmode
)
21164 return GET_MODE (count_exp
);
21165 if (!CONST_INT_P (count_exp
))
21167 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21172 /* When SRCPTR is non-NULL, output simple loop to move memory
21173 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21174 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21175 equivalent loop to set memory by VALUE (supposed to be in MODE).
21177 The size is rounded down to whole number of chunk size moved at once.
21178 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21182 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21183 rtx destptr
, rtx srcptr
, rtx value
,
21184 rtx count
, enum machine_mode mode
, int unroll
,
21187 rtx out_label
, top_label
, iter
, tmp
;
21188 enum machine_mode iter_mode
= counter_mode (count
);
21189 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21190 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21196 top_label
= gen_label_rtx ();
21197 out_label
= gen_label_rtx ();
21198 iter
= gen_reg_rtx (iter_mode
);
21200 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21201 NULL
, 1, OPTAB_DIRECT
);
21202 /* Those two should combine. */
21203 if (piece_size
== const1_rtx
)
21205 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21207 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21209 emit_move_insn (iter
, const0_rtx
);
21211 emit_label (top_label
);
21213 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21214 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21215 destmem
= change_address (destmem
, mode
, x_addr
);
21219 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21220 srcmem
= change_address (srcmem
, mode
, y_addr
);
21222 /* When unrolling for chips that reorder memory reads and writes,
21223 we can save registers by using single temporary.
21224 Also using 4 temporaries is overkill in 32bit mode. */
21225 if (!TARGET_64BIT
&& 0)
21227 for (i
= 0; i
< unroll
; i
++)
21232 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21234 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21236 emit_move_insn (destmem
, srcmem
);
21242 gcc_assert (unroll
<= 4);
21243 for (i
= 0; i
< unroll
; i
++)
21245 tmpreg
[i
] = gen_reg_rtx (mode
);
21249 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21251 emit_move_insn (tmpreg
[i
], srcmem
);
21253 for (i
= 0; i
< unroll
; i
++)
21258 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21260 emit_move_insn (destmem
, tmpreg
[i
]);
21265 for (i
= 0; i
< unroll
; i
++)
21269 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21270 emit_move_insn (destmem
, value
);
21273 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21274 true, OPTAB_LIB_WIDEN
);
21276 emit_move_insn (iter
, tmp
);
21278 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21280 if (expected_size
!= -1)
21282 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21283 if (expected_size
== 0)
21285 else if (expected_size
> REG_BR_PROB_BASE
)
21286 predict_jump (REG_BR_PROB_BASE
- 1);
21288 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21291 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21292 iter
= ix86_zero_extend_to_Pmode (iter
);
21293 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21294 true, OPTAB_LIB_WIDEN
);
21295 if (tmp
!= destptr
)
21296 emit_move_insn (destptr
, tmp
);
21299 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21300 true, OPTAB_LIB_WIDEN
);
21302 emit_move_insn (srcptr
, tmp
);
21304 emit_label (out_label
);
21307 /* Output "rep; mov" instruction.
21308 Arguments have same meaning as for previous function */
21310 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21311 rtx destptr
, rtx srcptr
,
21313 enum machine_mode mode
)
21318 HOST_WIDE_INT rounded_count
;
21320 /* If the size is known, it is shorter to use rep movs. */
21321 if (mode
== QImode
&& CONST_INT_P (count
)
21322 && !(INTVAL (count
) & 3))
21325 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21326 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21327 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21328 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21329 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21330 if (mode
!= QImode
)
21332 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21333 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21334 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21335 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21336 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21337 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21341 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21342 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21344 if (CONST_INT_P (count
))
21346 rounded_count
= (INTVAL (count
)
21347 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21348 destmem
= shallow_copy_rtx (destmem
);
21349 srcmem
= shallow_copy_rtx (srcmem
);
21350 set_mem_size (destmem
, rounded_count
);
21351 set_mem_size (srcmem
, rounded_count
);
21355 if (MEM_SIZE_KNOWN_P (destmem
))
21356 clear_mem_size (destmem
);
21357 if (MEM_SIZE_KNOWN_P (srcmem
))
21358 clear_mem_size (srcmem
);
21360 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21364 /* Output "rep; stos" instruction.
21365 Arguments have same meaning as for previous function */
21367 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21368 rtx count
, enum machine_mode mode
,
21373 HOST_WIDE_INT rounded_count
;
21375 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21376 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21377 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21378 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21379 if (mode
!= QImode
)
21381 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21382 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21383 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21386 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21387 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21389 rounded_count
= (INTVAL (count
)
21390 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21391 destmem
= shallow_copy_rtx (destmem
);
21392 set_mem_size (destmem
, rounded_count
);
21394 else if (MEM_SIZE_KNOWN_P (destmem
))
21395 clear_mem_size (destmem
);
21396 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21400 emit_strmov (rtx destmem
, rtx srcmem
,
21401 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21403 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21404 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21405 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21408 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21410 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21411 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21414 if (CONST_INT_P (count
))
21416 HOST_WIDE_INT countval
= INTVAL (count
);
21419 if ((countval
& 0x10) && max_size
> 16)
21423 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21424 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21427 gcc_unreachable ();
21430 if ((countval
& 0x08) && max_size
> 8)
21433 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21436 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21437 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21441 if ((countval
& 0x04) && max_size
> 4)
21443 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21446 if ((countval
& 0x02) && max_size
> 2)
21448 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21451 if ((countval
& 0x01) && max_size
> 1)
21453 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21460 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21461 count
, 1, OPTAB_DIRECT
);
21462 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21463 count
, QImode
, 1, 4);
21467 /* When there are stringops, we can cheaply increase dest and src pointers.
21468 Otherwise we save code size by maintaining offset (zero is readily
21469 available from preceding rep operation) and using x86 addressing modes.
21471 if (TARGET_SINGLE_STRINGOP
)
21475 rtx label
= ix86_expand_aligntest (count
, 4, true);
21476 src
= change_address (srcmem
, SImode
, srcptr
);
21477 dest
= change_address (destmem
, SImode
, destptr
);
21478 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21479 emit_label (label
);
21480 LABEL_NUSES (label
) = 1;
21484 rtx label
= ix86_expand_aligntest (count
, 2, true);
21485 src
= change_address (srcmem
, HImode
, srcptr
);
21486 dest
= change_address (destmem
, HImode
, destptr
);
21487 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21488 emit_label (label
);
21489 LABEL_NUSES (label
) = 1;
21493 rtx label
= ix86_expand_aligntest (count
, 1, true);
21494 src
= change_address (srcmem
, QImode
, srcptr
);
21495 dest
= change_address (destmem
, QImode
, destptr
);
21496 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21497 emit_label (label
);
21498 LABEL_NUSES (label
) = 1;
21503 rtx offset
= force_reg (Pmode
, const0_rtx
);
21508 rtx label
= ix86_expand_aligntest (count
, 4, true);
21509 src
= change_address (srcmem
, SImode
, srcptr
);
21510 dest
= change_address (destmem
, SImode
, destptr
);
21511 emit_move_insn (dest
, src
);
21512 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21513 true, OPTAB_LIB_WIDEN
);
21515 emit_move_insn (offset
, tmp
);
21516 emit_label (label
);
21517 LABEL_NUSES (label
) = 1;
21521 rtx label
= ix86_expand_aligntest (count
, 2, true);
21522 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21523 src
= change_address (srcmem
, HImode
, tmp
);
21524 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21525 dest
= change_address (destmem
, HImode
, tmp
);
21526 emit_move_insn (dest
, src
);
21527 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21528 true, OPTAB_LIB_WIDEN
);
21530 emit_move_insn (offset
, tmp
);
21531 emit_label (label
);
21532 LABEL_NUSES (label
) = 1;
21536 rtx label
= ix86_expand_aligntest (count
, 1, true);
21537 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21538 src
= change_address (srcmem
, QImode
, tmp
);
21539 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21540 dest
= change_address (destmem
, QImode
, tmp
);
21541 emit_move_insn (dest
, src
);
21542 emit_label (label
);
21543 LABEL_NUSES (label
) = 1;
21548 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21550 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21551 rtx count
, int max_size
)
21554 expand_simple_binop (counter_mode (count
), AND
, count
,
21555 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21556 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21557 gen_lowpart (QImode
, value
), count
, QImode
,
21561 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21563 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21567 if (CONST_INT_P (count
))
21569 HOST_WIDE_INT countval
= INTVAL (count
);
21572 if ((countval
& 0x10) && max_size
> 16)
21576 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21577 emit_insn (gen_strset (destptr
, dest
, value
));
21578 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21579 emit_insn (gen_strset (destptr
, dest
, value
));
21582 gcc_unreachable ();
21585 if ((countval
& 0x08) && max_size
> 8)
21589 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21590 emit_insn (gen_strset (destptr
, dest
, value
));
21594 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21595 emit_insn (gen_strset (destptr
, dest
, value
));
21596 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21597 emit_insn (gen_strset (destptr
, dest
, value
));
21601 if ((countval
& 0x04) && max_size
> 4)
21603 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21604 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21607 if ((countval
& 0x02) && max_size
> 2)
21609 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21610 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21613 if ((countval
& 0x01) && max_size
> 1)
21615 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21616 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21623 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21628 rtx label
= ix86_expand_aligntest (count
, 16, true);
21631 dest
= change_address (destmem
, DImode
, destptr
);
21632 emit_insn (gen_strset (destptr
, dest
, value
));
21633 emit_insn (gen_strset (destptr
, dest
, value
));
21637 dest
= change_address (destmem
, SImode
, destptr
);
21638 emit_insn (gen_strset (destptr
, dest
, value
));
21639 emit_insn (gen_strset (destptr
, dest
, value
));
21640 emit_insn (gen_strset (destptr
, dest
, value
));
21641 emit_insn (gen_strset (destptr
, dest
, value
));
21643 emit_label (label
);
21644 LABEL_NUSES (label
) = 1;
21648 rtx label
= ix86_expand_aligntest (count
, 8, true);
21651 dest
= change_address (destmem
, DImode
, destptr
);
21652 emit_insn (gen_strset (destptr
, dest
, value
));
21656 dest
= change_address (destmem
, SImode
, destptr
);
21657 emit_insn (gen_strset (destptr
, dest
, value
));
21658 emit_insn (gen_strset (destptr
, dest
, value
));
21660 emit_label (label
);
21661 LABEL_NUSES (label
) = 1;
21665 rtx label
= ix86_expand_aligntest (count
, 4, true);
21666 dest
= change_address (destmem
, SImode
, destptr
);
21667 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21668 emit_label (label
);
21669 LABEL_NUSES (label
) = 1;
21673 rtx label
= ix86_expand_aligntest (count
, 2, true);
21674 dest
= change_address (destmem
, HImode
, destptr
);
21675 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21676 emit_label (label
);
21677 LABEL_NUSES (label
) = 1;
21681 rtx label
= ix86_expand_aligntest (count
, 1, true);
21682 dest
= change_address (destmem
, QImode
, destptr
);
21683 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21684 emit_label (label
);
21685 LABEL_NUSES (label
) = 1;
21689 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21690 DESIRED_ALIGNMENT. */
21692 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21693 rtx destptr
, rtx srcptr
, rtx count
,
21694 int align
, int desired_alignment
)
21696 if (align
<= 1 && desired_alignment
> 1)
21698 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21699 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21700 destmem
= change_address (destmem
, QImode
, destptr
);
21701 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21702 ix86_adjust_counter (count
, 1);
21703 emit_label (label
);
21704 LABEL_NUSES (label
) = 1;
21706 if (align
<= 2 && desired_alignment
> 2)
21708 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21709 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21710 destmem
= change_address (destmem
, HImode
, destptr
);
21711 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21712 ix86_adjust_counter (count
, 2);
21713 emit_label (label
);
21714 LABEL_NUSES (label
) = 1;
21716 if (align
<= 4 && desired_alignment
> 4)
21718 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21719 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21720 destmem
= change_address (destmem
, SImode
, destptr
);
21721 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21722 ix86_adjust_counter (count
, 4);
21723 emit_label (label
);
21724 LABEL_NUSES (label
) = 1;
21726 gcc_assert (desired_alignment
<= 8);
21729 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21730 ALIGN_BYTES is how many bytes need to be copied. */
21732 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21733 int desired_align
, int align_bytes
)
21736 rtx orig_dst
= dst
;
21737 rtx orig_src
= src
;
21739 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21740 if (src_align_bytes
>= 0)
21741 src_align_bytes
= desired_align
- src_align_bytes
;
21742 if (align_bytes
& 1)
21744 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21745 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21747 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21749 if (align_bytes
& 2)
21751 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21752 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21753 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21754 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21755 if (src_align_bytes
>= 0
21756 && (src_align_bytes
& 1) == (align_bytes
& 1)
21757 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21758 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21760 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21762 if (align_bytes
& 4)
21764 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21765 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21766 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21767 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21768 if (src_align_bytes
>= 0)
21770 unsigned int src_align
= 0;
21771 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21773 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21775 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21776 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21779 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21781 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21782 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21783 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21784 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21785 if (src_align_bytes
>= 0)
21787 unsigned int src_align
= 0;
21788 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21790 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21792 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21794 if (src_align
> (unsigned int) desired_align
)
21795 src_align
= desired_align
;
21796 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21797 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21799 if (MEM_SIZE_KNOWN_P (orig_dst
))
21800 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21801 if (MEM_SIZE_KNOWN_P (orig_src
))
21802 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
21807 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21808 DESIRED_ALIGNMENT. */
21810 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
21811 int align
, int desired_alignment
)
21813 if (align
<= 1 && desired_alignment
> 1)
21815 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21816 destmem
= change_address (destmem
, QImode
, destptr
);
21817 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
21818 ix86_adjust_counter (count
, 1);
21819 emit_label (label
);
21820 LABEL_NUSES (label
) = 1;
21822 if (align
<= 2 && desired_alignment
> 2)
21824 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21825 destmem
= change_address (destmem
, HImode
, destptr
);
21826 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
21827 ix86_adjust_counter (count
, 2);
21828 emit_label (label
);
21829 LABEL_NUSES (label
) = 1;
21831 if (align
<= 4 && desired_alignment
> 4)
21833 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21834 destmem
= change_address (destmem
, SImode
, destptr
);
21835 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
21836 ix86_adjust_counter (count
, 4);
21837 emit_label (label
);
21838 LABEL_NUSES (label
) = 1;
21840 gcc_assert (desired_alignment
<= 8);
21843 /* Set enough from DST to align DST known to by aligned by ALIGN to
21844 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21846 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
21847 int desired_align
, int align_bytes
)
21850 rtx orig_dst
= dst
;
21851 if (align_bytes
& 1)
21853 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21855 emit_insn (gen_strset (destreg
, dst
,
21856 gen_lowpart (QImode
, value
)));
21858 if (align_bytes
& 2)
21860 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21861 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21862 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21864 emit_insn (gen_strset (destreg
, dst
,
21865 gen_lowpart (HImode
, value
)));
21867 if (align_bytes
& 4)
21869 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21870 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21871 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21873 emit_insn (gen_strset (destreg
, dst
,
21874 gen_lowpart (SImode
, value
)));
21876 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21877 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21878 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21879 if (MEM_SIZE_KNOWN_P (orig_dst
))
21880 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21884 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21885 static enum stringop_alg
21886 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
21887 int *dynamic_check
)
21889 const struct stringop_algs
* algs
;
21890 bool optimize_for_speed
;
21891 /* Algorithms using the rep prefix want at least edi and ecx;
21892 additionally, memset wants eax and memcpy wants esi. Don't
21893 consider such algorithms if the user has appropriated those
21894 registers for their own purposes. */
21895 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
21897 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
21899 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21900 || (alg != rep_prefix_1_byte \
21901 && alg != rep_prefix_4_byte \
21902 && alg != rep_prefix_8_byte))
21903 const struct processor_costs
*cost
;
21905 /* Even if the string operation call is cold, we still might spend a lot
21906 of time processing large blocks. */
21907 if (optimize_function_for_size_p (cfun
)
21908 || (optimize_insn_for_size_p ()
21909 && expected_size
!= -1 && expected_size
< 256))
21910 optimize_for_speed
= false;
21912 optimize_for_speed
= true;
21914 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
21916 *dynamic_check
= -1;
21918 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
21920 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
21921 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
21922 return ix86_stringop_alg
;
21923 /* rep; movq or rep; movl is the smallest variant. */
21924 else if (!optimize_for_speed
)
21926 if (!count
|| (count
& 3))
21927 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
21929 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
21931 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21933 else if (expected_size
!= -1 && expected_size
< 4)
21934 return loop_1_byte
;
21935 else if (expected_size
!= -1)
21938 enum stringop_alg alg
= libcall
;
21939 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21941 /* We get here if the algorithms that were not libcall-based
21942 were rep-prefix based and we are unable to use rep prefixes
21943 based on global register usage. Break out of the loop and
21944 use the heuristic below. */
21945 if (algs
->size
[i
].max
== 0)
21947 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
21949 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21951 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
21953 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21954 last non-libcall inline algorithm. */
21955 if (TARGET_INLINE_ALL_STRINGOPS
)
21957 /* When the current size is best to be copied by a libcall,
21958 but we are still forced to inline, run the heuristic below
21959 that will pick code for medium sized blocks. */
21960 if (alg
!= libcall
)
21964 else if (ALG_USABLE_P (candidate
))
21968 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
21970 /* When asked to inline the call anyway, try to pick meaningful choice.
21971 We look for maximal size of block that is faster to copy by hand and
21972 take blocks of at most of that size guessing that average size will
21973 be roughly half of the block.
21975 If this turns out to be bad, we might simply specify the preferred
21976 choice in ix86_costs. */
21977 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21978 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
21981 enum stringop_alg alg
;
21983 bool any_alg_usable_p
= true;
21985 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21987 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21988 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
21990 if (candidate
!= libcall
&& candidate
21991 && ALG_USABLE_P (candidate
))
21992 max
= algs
->size
[i
].max
;
21994 /* If there aren't any usable algorithms, then recursing on
21995 smaller sizes isn't going to find anything. Just return the
21996 simple byte-at-a-time copy loop. */
21997 if (!any_alg_usable_p
)
21999 /* Pick something reasonable. */
22000 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22001 *dynamic_check
= 128;
22002 return loop_1_byte
;
22006 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
22007 gcc_assert (*dynamic_check
== -1);
22008 gcc_assert (alg
!= libcall
);
22009 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22010 *dynamic_check
= max
;
22013 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22014 #undef ALG_USABLE_P
22017 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22018 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22020 decide_alignment (int align
,
22021 enum stringop_alg alg
,
22024 int desired_align
= 0;
22028 gcc_unreachable ();
22030 case unrolled_loop
:
22031 desired_align
= GET_MODE_SIZE (Pmode
);
22033 case rep_prefix_8_byte
:
22036 case rep_prefix_4_byte
:
22037 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22038 copying whole cacheline at once. */
22039 if (TARGET_PENTIUMPRO
)
22044 case rep_prefix_1_byte
:
22045 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22046 copying whole cacheline at once. */
22047 if (TARGET_PENTIUMPRO
)
22061 if (desired_align
< align
)
22062 desired_align
= align
;
22063 if (expected_size
!= -1 && expected_size
< 4)
22064 desired_align
= align
;
22065 return desired_align
;
22068 /* Return the smallest power of 2 greater than VAL. */
22070 smallest_pow2_greater_than (int val
)
22078 /* Expand string move (memcpy) operation. Use i386 string operations
22079 when profitable. expand_setmem contains similar code. The code
22080 depends upon architecture, block size and alignment, but always has
22081 the same overall structure:
22083 1) Prologue guard: Conditional that jumps up to epilogues for small
22084 blocks that can be handled by epilogue alone. This is faster
22085 but also needed for correctness, since prologue assume the block
22086 is larger than the desired alignment.
22088 Optional dynamic check for size and libcall for large
22089 blocks is emitted here too, with -minline-stringops-dynamically.
22091 2) Prologue: copy first few bytes in order to get destination
22092 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22093 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22094 copied. We emit either a jump tree on power of two sized
22095 blocks, or a byte loop.
22097 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22098 with specified algorithm.
22100 4) Epilogue: code copying tail of the block that is too small to be
22101 handled by main body (or up to size guarded by prologue guard). */
22104 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22105 rtx expected_align_exp
, rtx expected_size_exp
)
22111 rtx jump_around_label
= NULL
;
22112 HOST_WIDE_INT align
= 1;
22113 unsigned HOST_WIDE_INT count
= 0;
22114 HOST_WIDE_INT expected_size
= -1;
22115 int size_needed
= 0, epilogue_size_needed
;
22116 int desired_align
= 0, align_bytes
= 0;
22117 enum stringop_alg alg
;
22119 bool need_zero_guard
= false;
22121 if (CONST_INT_P (align_exp
))
22122 align
= INTVAL (align_exp
);
22123 /* i386 can do misaligned access on reasonably increased cost. */
22124 if (CONST_INT_P (expected_align_exp
)
22125 && INTVAL (expected_align_exp
) > align
)
22126 align
= INTVAL (expected_align_exp
);
22127 /* ALIGN is the minimum of destination and source alignment, but we care here
22128 just about destination alignment. */
22129 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22130 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22132 if (CONST_INT_P (count_exp
))
22133 count
= expected_size
= INTVAL (count_exp
);
22134 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22135 expected_size
= INTVAL (expected_size_exp
);
22137 /* Make sure we don't need to care about overflow later on. */
22138 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22141 /* Step 0: Decide on preferred algorithm, desired alignment and
22142 size of chunks to be copied by main loop. */
22144 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22145 desired_align
= decide_alignment (align
, alg
, expected_size
);
22147 if (!TARGET_ALIGN_STRINGOPS
)
22148 align
= desired_align
;
22150 if (alg
== libcall
)
22152 gcc_assert (alg
!= no_stringop
);
22154 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22155 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22156 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22161 gcc_unreachable ();
22163 need_zero_guard
= true;
22164 size_needed
= GET_MODE_SIZE (word_mode
);
22166 case unrolled_loop
:
22167 need_zero_guard
= true;
22168 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22170 case rep_prefix_8_byte
:
22173 case rep_prefix_4_byte
:
22176 case rep_prefix_1_byte
:
22180 need_zero_guard
= true;
22185 epilogue_size_needed
= size_needed
;
22187 /* Step 1: Prologue guard. */
22189 /* Alignment code needs count to be in register. */
22190 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22192 if (INTVAL (count_exp
) > desired_align
22193 && INTVAL (count_exp
) > size_needed
)
22196 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22197 if (align_bytes
<= 0)
22200 align_bytes
= desired_align
- align_bytes
;
22202 if (align_bytes
== 0)
22203 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22205 gcc_assert (desired_align
>= 1 && align
>= 1);
22207 /* Ensure that alignment prologue won't copy past end of block. */
22208 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22210 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22211 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22212 Make sure it is power of 2. */
22213 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22217 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22219 /* If main algorithm works on QImode, no epilogue is needed.
22220 For small sizes just don't align anything. */
22221 if (size_needed
== 1)
22222 desired_align
= align
;
22229 label
= gen_label_rtx ();
22230 emit_cmp_and_jump_insns (count_exp
,
22231 GEN_INT (epilogue_size_needed
),
22232 LTU
, 0, counter_mode (count_exp
), 1, label
);
22233 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22234 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22236 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22240 /* Emit code to decide on runtime whether library call or inline should be
22242 if (dynamic_check
!= -1)
22244 if (CONST_INT_P (count_exp
))
22246 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22248 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22249 count_exp
= const0_rtx
;
22255 rtx hot_label
= gen_label_rtx ();
22256 jump_around_label
= gen_label_rtx ();
22257 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22258 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22259 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22260 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22261 emit_jump (jump_around_label
);
22262 emit_label (hot_label
);
22266 /* Step 2: Alignment prologue. */
22268 if (desired_align
> align
)
22270 if (align_bytes
== 0)
22272 /* Except for the first move in epilogue, we no longer know
22273 constant offset in aliasing info. It don't seems to worth
22274 the pain to maintain it for the first move, so throw away
22276 src
= change_address (src
, BLKmode
, srcreg
);
22277 dst
= change_address (dst
, BLKmode
, destreg
);
22278 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22283 /* If we know how many bytes need to be stored before dst is
22284 sufficiently aligned, maintain aliasing info accurately. */
22285 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22286 desired_align
, align_bytes
);
22287 count_exp
= plus_constant (count_exp
, -align_bytes
);
22288 count
-= align_bytes
;
22290 if (need_zero_guard
22291 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22292 || (align_bytes
== 0
22293 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22294 + desired_align
- align
))))
22296 /* It is possible that we copied enough so the main loop will not
22298 gcc_assert (size_needed
> 1);
22299 if (label
== NULL_RTX
)
22300 label
= gen_label_rtx ();
22301 emit_cmp_and_jump_insns (count_exp
,
22302 GEN_INT (size_needed
),
22303 LTU
, 0, counter_mode (count_exp
), 1, label
);
22304 if (expected_size
== -1
22305 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22306 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22308 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22311 if (label
&& size_needed
== 1)
22313 emit_label (label
);
22314 LABEL_NUSES (label
) = 1;
22316 epilogue_size_needed
= 1;
22318 else if (label
== NULL_RTX
)
22319 epilogue_size_needed
= size_needed
;
22321 /* Step 3: Main loop. */
22327 gcc_unreachable ();
22329 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22330 count_exp
, QImode
, 1, expected_size
);
22333 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22334 count_exp
, word_mode
, 1, expected_size
);
22336 case unrolled_loop
:
22337 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22338 registers for 4 temporaries anyway. */
22339 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22340 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22343 case rep_prefix_8_byte
:
22344 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22347 case rep_prefix_4_byte
:
22348 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22351 case rep_prefix_1_byte
:
22352 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22356 /* Adjust properly the offset of src and dest memory for aliasing. */
22357 if (CONST_INT_P (count_exp
))
22359 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22360 (count
/ size_needed
) * size_needed
);
22361 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22362 (count
/ size_needed
) * size_needed
);
22366 src
= change_address (src
, BLKmode
, srcreg
);
22367 dst
= change_address (dst
, BLKmode
, destreg
);
22370 /* Step 4: Epilogue to copy the remaining bytes. */
22374 /* When the main loop is done, COUNT_EXP might hold original count,
22375 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22376 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22377 bytes. Compensate if needed. */
22379 if (size_needed
< epilogue_size_needed
)
22382 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22383 GEN_INT (size_needed
- 1), count_exp
, 1,
22385 if (tmp
!= count_exp
)
22386 emit_move_insn (count_exp
, tmp
);
22388 emit_label (label
);
22389 LABEL_NUSES (label
) = 1;
22392 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22393 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22394 epilogue_size_needed
);
22395 if (jump_around_label
)
22396 emit_label (jump_around_label
);
22400 /* Helper function for memcpy. For QImode value 0xXY produce
22401 0xXYXYXYXY of wide specified by MODE. This is essentially
22402 a * 0x10101010, but we can do slightly better than
22403 synth_mult by unwinding the sequence by hand on CPUs with
22406 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22408 enum machine_mode valmode
= GET_MODE (val
);
22410 int nops
= mode
== DImode
? 3 : 2;
22412 gcc_assert (mode
== SImode
|| mode
== DImode
);
22413 if (val
== const0_rtx
)
22414 return copy_to_mode_reg (mode
, const0_rtx
);
22415 if (CONST_INT_P (val
))
22417 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22421 if (mode
== DImode
)
22422 v
|= (v
<< 16) << 16;
22423 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22426 if (valmode
== VOIDmode
)
22428 if (valmode
!= QImode
)
22429 val
= gen_lowpart (QImode
, val
);
22430 if (mode
== QImode
)
22432 if (!TARGET_PARTIAL_REG_STALL
)
22434 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22435 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22436 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22437 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22439 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22440 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22441 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22446 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22448 if (!TARGET_PARTIAL_REG_STALL
)
22449 if (mode
== SImode
)
22450 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22452 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22455 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22456 NULL
, 1, OPTAB_DIRECT
);
22458 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22460 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22461 NULL
, 1, OPTAB_DIRECT
);
22462 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22463 if (mode
== SImode
)
22465 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22466 NULL
, 1, OPTAB_DIRECT
);
22467 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22472 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22473 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22474 alignment from ALIGN to DESIRED_ALIGN. */
22476 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22481 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22482 promoted_val
= promote_duplicated_reg (DImode
, val
);
22483 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22484 promoted_val
= promote_duplicated_reg (SImode
, val
);
22485 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22486 promoted_val
= promote_duplicated_reg (HImode
, val
);
22488 promoted_val
= val
;
22490 return promoted_val
;
22493 /* Expand string clear operation (bzero). Use i386 string operations when
22494 profitable. See expand_movmem comment for explanation of individual
22495 steps performed. */
22497 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22498 rtx expected_align_exp
, rtx expected_size_exp
)
22503 rtx jump_around_label
= NULL
;
22504 HOST_WIDE_INT align
= 1;
22505 unsigned HOST_WIDE_INT count
= 0;
22506 HOST_WIDE_INT expected_size
= -1;
22507 int size_needed
= 0, epilogue_size_needed
;
22508 int desired_align
= 0, align_bytes
= 0;
22509 enum stringop_alg alg
;
22510 rtx promoted_val
= NULL
;
22511 bool force_loopy_epilogue
= false;
22513 bool need_zero_guard
= false;
22515 if (CONST_INT_P (align_exp
))
22516 align
= INTVAL (align_exp
);
22517 /* i386 can do misaligned access on reasonably increased cost. */
22518 if (CONST_INT_P (expected_align_exp
)
22519 && INTVAL (expected_align_exp
) > align
)
22520 align
= INTVAL (expected_align_exp
);
22521 if (CONST_INT_P (count_exp
))
22522 count
= expected_size
= INTVAL (count_exp
);
22523 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22524 expected_size
= INTVAL (expected_size_exp
);
22526 /* Make sure we don't need to care about overflow later on. */
22527 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22530 /* Step 0: Decide on preferred algorithm, desired alignment and
22531 size of chunks to be copied by main loop. */
22533 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22534 desired_align
= decide_alignment (align
, alg
, expected_size
);
22536 if (!TARGET_ALIGN_STRINGOPS
)
22537 align
= desired_align
;
22539 if (alg
== libcall
)
22541 gcc_assert (alg
!= no_stringop
);
22543 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22544 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22549 gcc_unreachable ();
22551 need_zero_guard
= true;
22552 size_needed
= GET_MODE_SIZE (word_mode
);
22554 case unrolled_loop
:
22555 need_zero_guard
= true;
22556 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
22558 case rep_prefix_8_byte
:
22561 case rep_prefix_4_byte
:
22564 case rep_prefix_1_byte
:
22568 need_zero_guard
= true;
22572 epilogue_size_needed
= size_needed
;
22574 /* Step 1: Prologue guard. */
22576 /* Alignment code needs count to be in register. */
22577 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22579 if (INTVAL (count_exp
) > desired_align
22580 && INTVAL (count_exp
) > size_needed
)
22583 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22584 if (align_bytes
<= 0)
22587 align_bytes
= desired_align
- align_bytes
;
22589 if (align_bytes
== 0)
22591 enum machine_mode mode
= SImode
;
22592 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22594 count_exp
= force_reg (mode
, count_exp
);
22597 /* Do the cheap promotion to allow better CSE across the
22598 main loop and epilogue (ie one load of the big constant in the
22599 front of all code. */
22600 if (CONST_INT_P (val_exp
))
22601 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22602 desired_align
, align
);
22603 /* Ensure that alignment prologue won't copy past end of block. */
22604 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22606 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22607 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22608 Make sure it is power of 2. */
22609 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22611 /* To improve performance of small blocks, we jump around the VAL
22612 promoting mode. This mean that if the promoted VAL is not constant,
22613 we might not use it in the epilogue and have to use byte
22615 if (epilogue_size_needed
> 2 && !promoted_val
)
22616 force_loopy_epilogue
= true;
22619 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22621 /* If main algorithm works on QImode, no epilogue is needed.
22622 For small sizes just don't align anything. */
22623 if (size_needed
== 1)
22624 desired_align
= align
;
22631 label
= gen_label_rtx ();
22632 emit_cmp_and_jump_insns (count_exp
,
22633 GEN_INT (epilogue_size_needed
),
22634 LTU
, 0, counter_mode (count_exp
), 1, label
);
22635 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22636 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22638 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22641 if (dynamic_check
!= -1)
22643 rtx hot_label
= gen_label_rtx ();
22644 jump_around_label
= gen_label_rtx ();
22645 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22646 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22647 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22648 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22649 emit_jump (jump_around_label
);
22650 emit_label (hot_label
);
22653 /* Step 2: Alignment prologue. */
22655 /* Do the expensive promotion once we branched off the small blocks. */
22657 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22658 desired_align
, align
);
22659 gcc_assert (desired_align
>= 1 && align
>= 1);
22661 if (desired_align
> align
)
22663 if (align_bytes
== 0)
22665 /* Except for the first move in epilogue, we no longer know
22666 constant offset in aliasing info. It don't seems to worth
22667 the pain to maintain it for the first move, so throw away
22669 dst
= change_address (dst
, BLKmode
, destreg
);
22670 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22675 /* If we know how many bytes need to be stored before dst is
22676 sufficiently aligned, maintain aliasing info accurately. */
22677 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22678 desired_align
, align_bytes
);
22679 count_exp
= plus_constant (count_exp
, -align_bytes
);
22680 count
-= align_bytes
;
22682 if (need_zero_guard
22683 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22684 || (align_bytes
== 0
22685 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22686 + desired_align
- align
))))
22688 /* It is possible that we copied enough so the main loop will not
22690 gcc_assert (size_needed
> 1);
22691 if (label
== NULL_RTX
)
22692 label
= gen_label_rtx ();
22693 emit_cmp_and_jump_insns (count_exp
,
22694 GEN_INT (size_needed
),
22695 LTU
, 0, counter_mode (count_exp
), 1, label
);
22696 if (expected_size
== -1
22697 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22698 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22700 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22703 if (label
&& size_needed
== 1)
22705 emit_label (label
);
22706 LABEL_NUSES (label
) = 1;
22708 promoted_val
= val_exp
;
22709 epilogue_size_needed
= 1;
22711 else if (label
== NULL_RTX
)
22712 epilogue_size_needed
= size_needed
;
22714 /* Step 3: Main loop. */
22720 gcc_unreachable ();
22722 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22723 count_exp
, QImode
, 1, expected_size
);
22726 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22727 count_exp
, word_mode
, 1, expected_size
);
22729 case unrolled_loop
:
22730 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22731 count_exp
, word_mode
, 4, expected_size
);
22733 case rep_prefix_8_byte
:
22734 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22737 case rep_prefix_4_byte
:
22738 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22741 case rep_prefix_1_byte
:
22742 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22746 /* Adjust properly the offset of src and dest memory for aliasing. */
22747 if (CONST_INT_P (count_exp
))
22748 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22749 (count
/ size_needed
) * size_needed
);
22751 dst
= change_address (dst
, BLKmode
, destreg
);
22753 /* Step 4: Epilogue to copy the remaining bytes. */
22757 /* When the main loop is done, COUNT_EXP might hold original count,
22758 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22759 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22760 bytes. Compensate if needed. */
22762 if (size_needed
< epilogue_size_needed
)
22765 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22766 GEN_INT (size_needed
- 1), count_exp
, 1,
22768 if (tmp
!= count_exp
)
22769 emit_move_insn (count_exp
, tmp
);
22771 emit_label (label
);
22772 LABEL_NUSES (label
) = 1;
22775 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22777 if (force_loopy_epilogue
)
22778 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22779 epilogue_size_needed
);
22781 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22782 epilogue_size_needed
);
22784 if (jump_around_label
)
22785 emit_label (jump_around_label
);
22789 /* Expand the appropriate insns for doing strlen if not just doing
22792 out = result, initialized with the start address
22793 align_rtx = alignment of the address.
22794 scratch = scratch register, initialized with the startaddress when
22795 not aligned, otherwise undefined
22797 This is just the body. It needs the initializations mentioned above and
22798 some address computing at the end. These things are done in i386.md. */
22801 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
22805 rtx align_2_label
= NULL_RTX
;
22806 rtx align_3_label
= NULL_RTX
;
22807 rtx align_4_label
= gen_label_rtx ();
22808 rtx end_0_label
= gen_label_rtx ();
22810 rtx tmpreg
= gen_reg_rtx (SImode
);
22811 rtx scratch
= gen_reg_rtx (SImode
);
22815 if (CONST_INT_P (align_rtx
))
22816 align
= INTVAL (align_rtx
);
22818 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22820 /* Is there a known alignment and is it less than 4? */
22823 rtx scratch1
= gen_reg_rtx (Pmode
);
22824 emit_move_insn (scratch1
, out
);
22825 /* Is there a known alignment and is it not 2? */
22828 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
22829 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
22831 /* Leave just the 3 lower bits. */
22832 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
22833 NULL_RTX
, 0, OPTAB_WIDEN
);
22835 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22836 Pmode
, 1, align_4_label
);
22837 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
22838 Pmode
, 1, align_2_label
);
22839 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
22840 Pmode
, 1, align_3_label
);
22844 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22845 check if is aligned to 4 - byte. */
22847 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
22848 NULL_RTX
, 0, OPTAB_WIDEN
);
22850 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22851 Pmode
, 1, align_4_label
);
22854 mem
= change_address (src
, QImode
, out
);
22856 /* Now compare the bytes. */
22858 /* Compare the first n unaligned byte on a byte per byte basis. */
22859 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
22860 QImode
, 1, end_0_label
);
22862 /* Increment the address. */
22863 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22865 /* Not needed with an alignment of 2 */
22868 emit_label (align_2_label
);
22870 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22873 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22875 emit_label (align_3_label
);
22878 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22881 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22884 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22885 align this loop. It gives only huge programs, but does not help to
22887 emit_label (align_4_label
);
22889 mem
= change_address (src
, SImode
, out
);
22890 emit_move_insn (scratch
, mem
);
22891 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
22893 /* This formula yields a nonzero result iff one of the bytes is zero.
22894 This saves three branches inside loop and many cycles. */
22896 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
22897 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
22898 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
22899 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
22900 gen_int_mode (0x80808080, SImode
)));
22901 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
22906 rtx reg
= gen_reg_rtx (SImode
);
22907 rtx reg2
= gen_reg_rtx (Pmode
);
22908 emit_move_insn (reg
, tmpreg
);
22909 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
22911 /* If zero is not in the first two bytes, move two bytes forward. */
22912 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22913 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22914 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22915 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
22916 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
22919 /* Emit lea manually to avoid clobbering of flags. */
22920 emit_insn (gen_rtx_SET (SImode
, reg2
,
22921 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
22923 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22924 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22925 emit_insn (gen_rtx_SET (VOIDmode
, out
,
22926 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
22932 rtx end_2_label
= gen_label_rtx ();
22933 /* Is zero in the first two bytes? */
22935 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22936 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22937 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
22938 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
22939 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
22941 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
22942 JUMP_LABEL (tmp
) = end_2_label
;
22944 /* Not in the first two. Move two bytes forward. */
22945 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
22946 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
22948 emit_label (end_2_label
);
22952 /* Avoid branch in fixing the byte. */
22953 tmpreg
= gen_lowpart (QImode
, tmpreg
);
22954 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
22955 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22956 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
22957 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
22959 emit_label (end_0_label
);
22962 /* Expand strlen. */
22965 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
22967 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
22969 /* The generic case of strlen expander is long. Avoid it's
22970 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22972 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22973 && !TARGET_INLINE_ALL_STRINGOPS
22974 && !optimize_insn_for_size_p ()
22975 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
22978 addr
= force_reg (Pmode
, XEXP (src
, 0));
22979 scratch1
= gen_reg_rtx (Pmode
);
22981 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22982 && !optimize_insn_for_size_p ())
22984 /* Well it seems that some optimizer does not combine a call like
22985 foo(strlen(bar), strlen(bar));
22986 when the move and the subtraction is done here. It does calculate
22987 the length just once when these instructions are done inside of
22988 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22989 often used and I use one fewer register for the lifetime of
22990 output_strlen_unroll() this is better. */
22992 emit_move_insn (out
, addr
);
22994 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
22996 /* strlensi_unroll_1 returns the address of the zero at the end of
22997 the string, like memchr(), so compute the length by subtracting
22998 the start address. */
22999 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23005 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23006 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23009 scratch2
= gen_reg_rtx (Pmode
);
23010 scratch3
= gen_reg_rtx (Pmode
);
23011 scratch4
= force_reg (Pmode
, constm1_rtx
);
23013 emit_move_insn (scratch3
, addr
);
23014 eoschar
= force_reg (QImode
, eoschar
);
23016 src
= replace_equiv_address_nv (src
, scratch3
);
23018 /* If .md starts supporting :P, this can be done in .md. */
23019 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23020 scratch4
), UNSPEC_SCAS
);
23021 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23022 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23023 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23028 /* For given symbol (function) construct code to compute address of it's PLT
23029 entry in large x86-64 PIC model. */
23031 construct_plt_address (rtx symbol
)
23035 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23036 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23037 gcc_assert (Pmode
== DImode
);
23039 tmp
= gen_reg_rtx (Pmode
);
23040 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23042 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23043 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23048 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23050 rtx pop
, bool sibcall
)
23052 /* We need to represent that SI and DI registers are clobbered
23054 static int clobbered_registers
[] = {
23055 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23056 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23057 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23058 XMM15_REG
, SI_REG
, DI_REG
23060 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23061 rtx use
= NULL
, call
;
23062 unsigned int vec_len
;
23064 if (pop
== const0_rtx
)
23066 gcc_assert (!TARGET_64BIT
|| !pop
);
23068 if (TARGET_MACHO
&& !TARGET_64BIT
)
23071 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23072 fnaddr
= machopic_indirect_call_target (fnaddr
);
23077 /* Static functions and indirect calls don't need the pic register. */
23078 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23079 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23080 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23081 use_reg (&use
, pic_offset_table_rtx
);
23084 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23086 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23087 emit_move_insn (al
, callarg2
);
23088 use_reg (&use
, al
);
23091 if (ix86_cmodel
== CM_LARGE_PIC
23093 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23094 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23095 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23097 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23098 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23100 fnaddr
= XEXP (fnaddr
, 0);
23101 if (GET_MODE (fnaddr
) != word_mode
)
23102 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23103 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23107 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23109 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23110 vec
[vec_len
++] = call
;
23114 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23115 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23116 vec
[vec_len
++] = pop
;
23119 if (TARGET_64BIT_MS_ABI
23120 && (!callarg2
|| INTVAL (callarg2
) != -2))
23124 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23125 UNSPEC_MS_TO_SYSV_CALL
);
23127 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23129 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23131 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23133 clobbered_registers
[i
]));
23136 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23137 if (TARGET_VZEROUPPER
)
23140 if (cfun
->machine
->callee_pass_avx256_p
)
23142 if (cfun
->machine
->callee_return_avx256_p
)
23143 avx256
= callee_return_pass_avx256
;
23145 avx256
= callee_pass_avx256
;
23147 else if (cfun
->machine
->callee_return_avx256_p
)
23148 avx256
= callee_return_avx256
;
23150 avx256
= call_no_avx256
;
23152 if (reload_completed
)
23153 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23155 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23156 gen_rtvec (1, GEN_INT (avx256
)),
23157 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23161 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23162 call
= emit_call_insn (call
);
23164 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23170 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23172 rtx pat
= PATTERN (insn
);
23173 rtvec vec
= XVEC (pat
, 0);
23174 int len
= GET_NUM_ELEM (vec
) - 1;
23176 /* Strip off the last entry of the parallel. */
23177 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23178 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23180 pat
= RTVEC_ELT (vec
, 0);
23182 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23184 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23185 emit_call_insn (pat
);
23188 /* Output the assembly for a call instruction. */
23191 ix86_output_call_insn (rtx insn
, rtx call_op
)
23193 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23194 bool seh_nop_p
= false;
23197 if (SIBLING_CALL_P (insn
))
23201 /* SEH epilogue detection requires the indirect branch case
23202 to include REX.W. */
23203 else if (TARGET_SEH
)
23204 xasm
= "rex.W jmp %A0";
23208 output_asm_insn (xasm
, &call_op
);
23212 /* SEH unwinding can require an extra nop to be emitted in several
23213 circumstances. Determine if we have one of those. */
23218 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23220 /* If we get to another real insn, we don't need the nop. */
23224 /* If we get to the epilogue note, prevent a catch region from
23225 being adjacent to the standard epilogue sequence. If non-
23226 call-exceptions, we'll have done this during epilogue emission. */
23227 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23228 && !flag_non_call_exceptions
23229 && !can_throw_internal (insn
))
23236 /* If we didn't find a real insn following the call, prevent the
23237 unwinder from looking into the next function. */
23243 xasm
= "call\t%P0";
23245 xasm
= "call\t%A0";
23247 output_asm_insn (xasm
, &call_op
);
23255 /* Clear stack slot assignments remembered from previous functions.
23256 This is called from INIT_EXPANDERS once before RTL is emitted for each
23259 static struct machine_function
*
23260 ix86_init_machine_status (void)
23262 struct machine_function
*f
;
23264 f
= ggc_alloc_cleared_machine_function ();
23265 f
->use_fast_prologue_epilogue_nregs
= -1;
23266 f
->tls_descriptor_call_expanded_p
= 0;
23267 f
->call_abi
= ix86_abi
;
23272 /* Return a MEM corresponding to a stack slot with mode MODE.
23273 Allocate a new slot if necessary.
23275 The RTL for a function can have several slots available: N is
23276 which slot to use. */
23279 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23281 struct stack_local_entry
*s
;
23283 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23285 /* Virtual slot is valid only before vregs are instantiated. */
23286 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23288 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23289 if (s
->mode
== mode
&& s
->n
== n
)
23290 return validize_mem (copy_rtx (s
->rtl
));
23292 s
= ggc_alloc_stack_local_entry ();
23295 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23297 s
->next
= ix86_stack_locals
;
23298 ix86_stack_locals
= s
;
23299 return validize_mem (s
->rtl
);
23302 /* Calculate the length of the memory address in the instruction encoding.
23303 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23304 or other prefixes. */
23307 memory_address_length (rtx addr
)
23309 struct ix86_address parts
;
23310 rtx base
, index
, disp
;
23314 if (GET_CODE (addr
) == PRE_DEC
23315 || GET_CODE (addr
) == POST_INC
23316 || GET_CODE (addr
) == PRE_MODIFY
23317 || GET_CODE (addr
) == POST_MODIFY
)
23320 ok
= ix86_decompose_address (addr
, &parts
);
23323 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23324 parts
.base
= SUBREG_REG (parts
.base
);
23325 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23326 parts
.index
= SUBREG_REG (parts
.index
);
23329 index
= parts
.index
;
23332 /* Add length of addr32 prefix. */
23333 len
= (GET_CODE (addr
) == ZERO_EXTEND
23334 || GET_CODE (addr
) == AND
);
23337 - esp as the base always wants an index,
23338 - ebp as the base always wants a displacement,
23339 - r12 as the base always wants an index,
23340 - r13 as the base always wants a displacement. */
23342 /* Register Indirect. */
23343 if (base
&& !index
&& !disp
)
23345 /* esp (for its index) and ebp (for its displacement) need
23346 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23349 && (addr
== arg_pointer_rtx
23350 || addr
== frame_pointer_rtx
23351 || REGNO (addr
) == SP_REG
23352 || REGNO (addr
) == BP_REG
23353 || REGNO (addr
) == R12_REG
23354 || REGNO (addr
) == R13_REG
))
23358 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23359 is not disp32, but disp32(%rip), so for disp32
23360 SIB byte is needed, unless print_operand_address
23361 optimizes it into disp32(%rip) or (%rip) is implied
23363 else if (disp
&& !base
&& !index
)
23370 if (GET_CODE (disp
) == CONST
)
23371 symbol
= XEXP (disp
, 0);
23372 if (GET_CODE (symbol
) == PLUS
23373 && CONST_INT_P (XEXP (symbol
, 1)))
23374 symbol
= XEXP (symbol
, 0);
23376 if (GET_CODE (symbol
) != LABEL_REF
23377 && (GET_CODE (symbol
) != SYMBOL_REF
23378 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23379 && (GET_CODE (symbol
) != UNSPEC
23380 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23381 && XINT (symbol
, 1) != UNSPEC_PCREL
23382 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23389 /* Find the length of the displacement constant. */
23392 if (base
&& satisfies_constraint_K (disp
))
23397 /* ebp always wants a displacement. Similarly r13. */
23398 else if (base
&& REG_P (base
)
23399 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23402 /* An index requires the two-byte modrm form.... */
23404 /* ...like esp (or r12), which always wants an index. */
23405 || base
== arg_pointer_rtx
23406 || base
== frame_pointer_rtx
23407 || (base
&& REG_P (base
)
23408 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23425 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23426 is set, expect that insn have 8bit immediate alternative. */
23428 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23432 extract_insn_cached (insn
);
23433 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23434 if (CONSTANT_P (recog_data
.operand
[i
]))
23436 enum attr_mode mode
= get_attr_mode (insn
);
23439 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23441 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23448 ival
= trunc_int_for_mode (ival
, HImode
);
23451 ival
= trunc_int_for_mode (ival
, SImode
);
23456 if (IN_RANGE (ival
, -128, 127))
23473 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23478 fatal_insn ("unknown insn mode", insn
);
23483 /* Compute default value for "length_address" attribute. */
23485 ix86_attr_length_address_default (rtx insn
)
23489 if (get_attr_type (insn
) == TYPE_LEA
)
23491 rtx set
= PATTERN (insn
), addr
;
23493 if (GET_CODE (set
) == PARALLEL
)
23494 set
= XVECEXP (set
, 0, 0);
23496 gcc_assert (GET_CODE (set
) == SET
);
23498 addr
= SET_SRC (set
);
23499 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23501 if (GET_CODE (addr
) == ZERO_EXTEND
)
23502 addr
= XEXP (addr
, 0);
23503 if (GET_CODE (addr
) == SUBREG
)
23504 addr
= SUBREG_REG (addr
);
23507 return memory_address_length (addr
);
23510 extract_insn_cached (insn
);
23511 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23512 if (MEM_P (recog_data
.operand
[i
]))
23514 constrain_operands_cached (reload_completed
);
23515 if (which_alternative
!= -1)
23517 const char *constraints
= recog_data
.constraints
[i
];
23518 int alt
= which_alternative
;
23520 while (*constraints
== '=' || *constraints
== '+')
23523 while (*constraints
++ != ',')
23525 /* Skip ignored operands. */
23526 if (*constraints
== 'X')
23529 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23534 /* Compute default value for "length_vex" attribute. It includes
23535 2 or 3 byte VEX prefix and 1 opcode byte. */
23538 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23542 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23543 byte VEX prefix. */
23544 if (!has_0f_opcode
|| has_vex_w
)
23547 /* We can always use 2 byte VEX prefix in 32bit. */
23551 extract_insn_cached (insn
);
23553 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23554 if (REG_P (recog_data
.operand
[i
]))
23556 /* REX.W bit uses 3 byte VEX prefix. */
23557 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23558 && GENERAL_REG_P (recog_data
.operand
[i
]))
23563 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23564 if (MEM_P (recog_data
.operand
[i
])
23565 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23572 /* Return the maximum number of instructions a cpu can issue. */
23575 ix86_issue_rate (void)
23579 case PROCESSOR_PENTIUM
:
23580 case PROCESSOR_ATOM
:
23584 case PROCESSOR_PENTIUMPRO
:
23585 case PROCESSOR_PENTIUM4
:
23586 case PROCESSOR_CORE2_32
:
23587 case PROCESSOR_CORE2_64
:
23588 case PROCESSOR_COREI7_32
:
23589 case PROCESSOR_COREI7_64
:
23590 case PROCESSOR_ATHLON
:
23592 case PROCESSOR_AMDFAM10
:
23593 case PROCESSOR_NOCONA
:
23594 case PROCESSOR_GENERIC32
:
23595 case PROCESSOR_GENERIC64
:
23596 case PROCESSOR_BDVER1
:
23597 case PROCESSOR_BDVER2
:
23598 case PROCESSOR_BTVER1
:
23606 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23607 by DEP_INSN and nothing set by DEP_INSN. */
23610 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23614 /* Simplify the test for uninteresting insns. */
23615 if (insn_type
!= TYPE_SETCC
23616 && insn_type
!= TYPE_ICMOV
23617 && insn_type
!= TYPE_FCMOV
23618 && insn_type
!= TYPE_IBR
)
23621 if ((set
= single_set (dep_insn
)) != 0)
23623 set
= SET_DEST (set
);
23626 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23627 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23628 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23629 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23631 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23632 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23637 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23640 /* This test is true if the dependent insn reads the flags but
23641 not any other potentially set register. */
23642 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23645 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23651 /* Return true iff USE_INSN has a memory address with operands set by
23655 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23658 extract_insn_cached (use_insn
);
23659 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23660 if (MEM_P (recog_data
.operand
[i
]))
23662 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23663 return modified_in_p (addr
, set_insn
) != 0;
23669 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23671 enum attr_type insn_type
, dep_insn_type
;
23672 enum attr_memory memory
;
23674 int dep_insn_code_number
;
23676 /* Anti and output dependencies have zero cost on all CPUs. */
23677 if (REG_NOTE_KIND (link
) != 0)
23680 dep_insn_code_number
= recog_memoized (dep_insn
);
23682 /* If we can't recognize the insns, we can't really do anything. */
23683 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23686 insn_type
= get_attr_type (insn
);
23687 dep_insn_type
= get_attr_type (dep_insn
);
23691 case PROCESSOR_PENTIUM
:
23692 /* Address Generation Interlock adds a cycle of latency. */
23693 if (insn_type
== TYPE_LEA
)
23695 rtx addr
= PATTERN (insn
);
23697 if (GET_CODE (addr
) == PARALLEL
)
23698 addr
= XVECEXP (addr
, 0, 0);
23700 gcc_assert (GET_CODE (addr
) == SET
);
23702 addr
= SET_SRC (addr
);
23703 if (modified_in_p (addr
, dep_insn
))
23706 else if (ix86_agi_dependent (dep_insn
, insn
))
23709 /* ??? Compares pair with jump/setcc. */
23710 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23713 /* Floating point stores require value to be ready one cycle earlier. */
23714 if (insn_type
== TYPE_FMOV
23715 && get_attr_memory (insn
) == MEMORY_STORE
23716 && !ix86_agi_dependent (dep_insn
, insn
))
23720 case PROCESSOR_PENTIUMPRO
:
23721 memory
= get_attr_memory (insn
);
23723 /* INT->FP conversion is expensive. */
23724 if (get_attr_fp_int_src (dep_insn
))
23727 /* There is one cycle extra latency between an FP op and a store. */
23728 if (insn_type
== TYPE_FMOV
23729 && (set
= single_set (dep_insn
)) != NULL_RTX
23730 && (set2
= single_set (insn
)) != NULL_RTX
23731 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23732 && MEM_P (SET_DEST (set2
)))
23735 /* Show ability of reorder buffer to hide latency of load by executing
23736 in parallel with previous instruction in case
23737 previous instruction is not needed to compute the address. */
23738 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23739 && !ix86_agi_dependent (dep_insn
, insn
))
23741 /* Claim moves to take one cycle, as core can issue one load
23742 at time and the next load can start cycle later. */
23743 if (dep_insn_type
== TYPE_IMOV
23744 || dep_insn_type
== TYPE_FMOV
)
23752 memory
= get_attr_memory (insn
);
23754 /* The esp dependency is resolved before the instruction is really
23756 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23757 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23760 /* INT->FP conversion is expensive. */
23761 if (get_attr_fp_int_src (dep_insn
))
23764 /* Show ability of reorder buffer to hide latency of load by executing
23765 in parallel with previous instruction in case
23766 previous instruction is not needed to compute the address. */
23767 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23768 && !ix86_agi_dependent (dep_insn
, insn
))
23770 /* Claim moves to take one cycle, as core can issue one load
23771 at time and the next load can start cycle later. */
23772 if (dep_insn_type
== TYPE_IMOV
23773 || dep_insn_type
== TYPE_FMOV
)
23782 case PROCESSOR_ATHLON
:
23784 case PROCESSOR_AMDFAM10
:
23785 case PROCESSOR_BDVER1
:
23786 case PROCESSOR_BDVER2
:
23787 case PROCESSOR_BTVER1
:
23788 case PROCESSOR_ATOM
:
23789 case PROCESSOR_GENERIC32
:
23790 case PROCESSOR_GENERIC64
:
23791 memory
= get_attr_memory (insn
);
23793 /* Show ability of reorder buffer to hide latency of load by executing
23794 in parallel with previous instruction in case
23795 previous instruction is not needed to compute the address. */
23796 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23797 && !ix86_agi_dependent (dep_insn
, insn
))
23799 enum attr_unit unit
= get_attr_unit (insn
);
23802 /* Because of the difference between the length of integer and
23803 floating unit pipeline preparation stages, the memory operands
23804 for floating point are cheaper.
23806 ??? For Athlon it the difference is most probably 2. */
23807 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
23810 loadcost
= TARGET_ATHLON
? 2 : 0;
23812 if (cost
>= loadcost
)
23825 /* How many alternative schedules to try. This should be as wide as the
23826 scheduling freedom in the DFA, but no wider. Making this value too
23827 large results extra work for the scheduler. */
23830 ia32_multipass_dfa_lookahead (void)
23834 case PROCESSOR_PENTIUM
:
23837 case PROCESSOR_PENTIUMPRO
:
23841 case PROCESSOR_CORE2_32
:
23842 case PROCESSOR_CORE2_64
:
23843 case PROCESSOR_COREI7_32
:
23844 case PROCESSOR_COREI7_64
:
23845 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23846 as many instructions can be executed on a cycle, i.e.,
23847 issue_rate. I wonder why tuning for many CPUs does not do this. */
23848 return ix86_issue_rate ();
23857 /* Model decoder of Core 2/i7.
23858 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23859 track the instruction fetch block boundaries and make sure that long
23860 (9+ bytes) instructions are assigned to D0. */
23862 /* Maximum length of an insn that can be handled by
23863 a secondary decoder unit. '8' for Core 2/i7. */
23864 static int core2i7_secondary_decoder_max_insn_size
;
23866 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23867 '16' for Core 2/i7. */
23868 static int core2i7_ifetch_block_size
;
23870 /* Maximum number of instructions decoder can handle per cycle.
23871 '6' for Core 2/i7. */
23872 static int core2i7_ifetch_block_max_insns
;
23874 typedef struct ix86_first_cycle_multipass_data_
*
23875 ix86_first_cycle_multipass_data_t
;
23876 typedef const struct ix86_first_cycle_multipass_data_
*
23877 const_ix86_first_cycle_multipass_data_t
;
23879 /* A variable to store target state across calls to max_issue within
23881 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
23882 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
23884 /* Initialize DATA. */
23886 core2i7_first_cycle_multipass_init (void *_data
)
23888 ix86_first_cycle_multipass_data_t data
23889 = (ix86_first_cycle_multipass_data_t
) _data
;
23891 data
->ifetch_block_len
= 0;
23892 data
->ifetch_block_n_insns
= 0;
23893 data
->ready_try_change
= NULL
;
23894 data
->ready_try_change_size
= 0;
23897 /* Advancing the cycle; reset ifetch block counts. */
23899 core2i7_dfa_post_advance_cycle (void)
23901 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
23903 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23905 data
->ifetch_block_len
= 0;
23906 data
->ifetch_block_n_insns
= 0;
23909 static int min_insn_size (rtx
);
23911 /* Filter out insns from ready_try that the core will not be able to issue
23912 on current cycle due to decoder. */
23914 core2i7_first_cycle_multipass_filter_ready_try
23915 (const_ix86_first_cycle_multipass_data_t data
,
23916 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
23923 if (ready_try
[n_ready
])
23926 insn
= get_ready_element (n_ready
);
23927 insn_size
= min_insn_size (insn
);
23929 if (/* If this is a too long an insn for a secondary decoder ... */
23930 (!first_cycle_insn_p
23931 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
23932 /* ... or it would not fit into the ifetch block ... */
23933 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
23934 /* ... or the decoder is full already ... */
23935 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
23936 /* ... mask the insn out. */
23938 ready_try
[n_ready
] = 1;
23940 if (data
->ready_try_change
)
23941 SET_BIT (data
->ready_try_change
, n_ready
);
23946 /* Prepare for a new round of multipass lookahead scheduling. */
23948 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
23949 bool first_cycle_insn_p
)
23951 ix86_first_cycle_multipass_data_t data
23952 = (ix86_first_cycle_multipass_data_t
) _data
;
23953 const_ix86_first_cycle_multipass_data_t prev_data
23954 = ix86_first_cycle_multipass_data
;
23956 /* Restore the state from the end of the previous round. */
23957 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
23958 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
23960 /* Filter instructions that cannot be issued on current cycle due to
23961 decoder restrictions. */
23962 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23963 first_cycle_insn_p
);
23966 /* INSN is being issued in current solution. Account for its impact on
23967 the decoder model. */
23969 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
23970 rtx insn
, const void *_prev_data
)
23972 ix86_first_cycle_multipass_data_t data
23973 = (ix86_first_cycle_multipass_data_t
) _data
;
23974 const_ix86_first_cycle_multipass_data_t prev_data
23975 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
23977 int insn_size
= min_insn_size (insn
);
23979 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
23980 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
23981 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
23982 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23984 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23985 if (!data
->ready_try_change
)
23987 data
->ready_try_change
= sbitmap_alloc (n_ready
);
23988 data
->ready_try_change_size
= n_ready
;
23990 else if (data
->ready_try_change_size
< n_ready
)
23992 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
23994 data
->ready_try_change_size
= n_ready
;
23996 sbitmap_zero (data
->ready_try_change
);
23998 /* Filter out insns from ready_try that the core will not be able to issue
23999 on current cycle due to decoder. */
24000 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24004 /* Revert the effect on ready_try. */
24006 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24008 int n_ready ATTRIBUTE_UNUSED
)
24010 const_ix86_first_cycle_multipass_data_t data
24011 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24012 unsigned int i
= 0;
24013 sbitmap_iterator sbi
;
24015 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24016 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
24022 /* Save the result of multipass lookahead scheduling for the next round. */
24024 core2i7_first_cycle_multipass_end (const void *_data
)
24026 const_ix86_first_cycle_multipass_data_t data
24027 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24028 ix86_first_cycle_multipass_data_t next_data
24029 = ix86_first_cycle_multipass_data
;
24033 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24034 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24038 /* Deallocate target data. */
24040 core2i7_first_cycle_multipass_fini (void *_data
)
24042 ix86_first_cycle_multipass_data_t data
24043 = (ix86_first_cycle_multipass_data_t
) _data
;
24045 if (data
->ready_try_change
)
24047 sbitmap_free (data
->ready_try_change
);
24048 data
->ready_try_change
= NULL
;
24049 data
->ready_try_change_size
= 0;
24053 /* Prepare for scheduling pass. */
24055 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24056 int verbose ATTRIBUTE_UNUSED
,
24057 int max_uid ATTRIBUTE_UNUSED
)
24059 /* Install scheduling hooks for current CPU. Some of these hooks are used
24060 in time-critical parts of the scheduler, so we only set them up when
24061 they are actually used. */
24064 case PROCESSOR_CORE2_32
:
24065 case PROCESSOR_CORE2_64
:
24066 case PROCESSOR_COREI7_32
:
24067 case PROCESSOR_COREI7_64
:
24068 targetm
.sched
.dfa_post_advance_cycle
24069 = core2i7_dfa_post_advance_cycle
;
24070 targetm
.sched
.first_cycle_multipass_init
24071 = core2i7_first_cycle_multipass_init
;
24072 targetm
.sched
.first_cycle_multipass_begin
24073 = core2i7_first_cycle_multipass_begin
;
24074 targetm
.sched
.first_cycle_multipass_issue
24075 = core2i7_first_cycle_multipass_issue
;
24076 targetm
.sched
.first_cycle_multipass_backtrack
24077 = core2i7_first_cycle_multipass_backtrack
;
24078 targetm
.sched
.first_cycle_multipass_end
24079 = core2i7_first_cycle_multipass_end
;
24080 targetm
.sched
.first_cycle_multipass_fini
24081 = core2i7_first_cycle_multipass_fini
;
24083 /* Set decoder parameters. */
24084 core2i7_secondary_decoder_max_insn_size
= 8;
24085 core2i7_ifetch_block_size
= 16;
24086 core2i7_ifetch_block_max_insns
= 6;
24090 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24091 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24092 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24093 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24094 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24095 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24096 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24102 /* Compute the alignment given to a constant that is being placed in memory.
24103 EXP is the constant and ALIGN is the alignment that the object would
24105 The value of this function is used instead of that alignment to align
24109 ix86_constant_alignment (tree exp
, int align
)
24111 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24112 || TREE_CODE (exp
) == INTEGER_CST
)
24114 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24116 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24119 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24120 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24121 return BITS_PER_WORD
;
24126 /* Compute the alignment for a static variable.
24127 TYPE is the data type, and ALIGN is the alignment that
24128 the object would ordinarily have. The value of this function is used
24129 instead of that alignment to align the object. */
24132 ix86_data_alignment (tree type
, int align
)
24134 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24136 if (AGGREGATE_TYPE_P (type
)
24137 && TYPE_SIZE (type
)
24138 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24139 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24140 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24141 && align
< max_align
)
24144 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24145 to 16byte boundary. */
24148 if (AGGREGATE_TYPE_P (type
)
24149 && TYPE_SIZE (type
)
24150 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24151 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24152 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24156 if (TREE_CODE (type
) == ARRAY_TYPE
)
24158 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24160 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24163 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24166 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24168 if ((TYPE_MODE (type
) == XCmode
24169 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24172 else if ((TREE_CODE (type
) == RECORD_TYPE
24173 || TREE_CODE (type
) == UNION_TYPE
24174 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24175 && TYPE_FIELDS (type
))
24177 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24179 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24182 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24183 || TREE_CODE (type
) == INTEGER_TYPE
)
24185 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24187 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24194 /* Compute the alignment for a local variable or a stack slot. EXP is
24195 the data type or decl itself, MODE is the widest mode available and
24196 ALIGN is the alignment that the object would ordinarily have. The
24197 value of this macro is used instead of that alignment to align the
24201 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24202 unsigned int align
)
24206 if (exp
&& DECL_P (exp
))
24208 type
= TREE_TYPE (exp
);
24217 /* Don't do dynamic stack realignment for long long objects with
24218 -mpreferred-stack-boundary=2. */
24221 && ix86_preferred_stack_boundary
< 64
24222 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24223 && (!type
|| !TYPE_USER_ALIGN (type
))
24224 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24227 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24228 register in MODE. We will return the largest alignment of XF
24232 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24233 align
= GET_MODE_ALIGNMENT (DFmode
);
24237 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24238 to 16byte boundary. Exact wording is:
24240 An array uses the same alignment as its elements, except that a local or
24241 global array variable of length at least 16 bytes or
24242 a C99 variable-length array variable always has alignment of at least 16 bytes.
24244 This was added to allow use of aligned SSE instructions at arrays. This
24245 rule is meant for static storage (where compiler can not do the analysis
24246 by itself). We follow it for automatic variables only when convenient.
24247 We fully control everything in the function compiled and functions from
24248 other unit can not rely on the alignment.
24250 Exclude va_list type. It is the common case of local array where
24251 we can not benefit from the alignment. */
24252 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24255 if (AGGREGATE_TYPE_P (type
)
24256 && (va_list_type_node
== NULL_TREE
24257 || (TYPE_MAIN_VARIANT (type
)
24258 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24259 && TYPE_SIZE (type
)
24260 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24261 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24262 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24265 if (TREE_CODE (type
) == ARRAY_TYPE
)
24267 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24269 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24272 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24274 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24276 if ((TYPE_MODE (type
) == XCmode
24277 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24280 else if ((TREE_CODE (type
) == RECORD_TYPE
24281 || TREE_CODE (type
) == UNION_TYPE
24282 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24283 && TYPE_FIELDS (type
))
24285 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24287 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24290 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24291 || TREE_CODE (type
) == INTEGER_TYPE
)
24294 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24296 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24302 /* Compute the minimum required alignment for dynamic stack realignment
24303 purposes for a local variable, parameter or a stack slot. EXP is
24304 the data type or decl itself, MODE is its mode and ALIGN is the
24305 alignment that the object would ordinarily have. */
24308 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24309 unsigned int align
)
24313 if (exp
&& DECL_P (exp
))
24315 type
= TREE_TYPE (exp
);
24324 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24327 /* Don't do dynamic stack realignment for long long objects with
24328 -mpreferred-stack-boundary=2. */
24329 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24330 && (!type
|| !TYPE_USER_ALIGN (type
))
24331 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24337 /* Find a location for the static chain incoming to a nested function.
24338 This is a register, unless all free registers are used by arguments. */
24341 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24345 if (!DECL_STATIC_CHAIN (fndecl
))
24350 /* We always use R10 in 64-bit mode. */
24358 /* By default in 32-bit mode we use ECX to pass the static chain. */
24361 fntype
= TREE_TYPE (fndecl
);
24362 ccvt
= ix86_get_callcvt (fntype
);
24363 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24365 /* Fastcall functions use ecx/edx for arguments, which leaves
24366 us with EAX for the static chain.
24367 Thiscall functions use ecx for arguments, which also
24368 leaves us with EAX for the static chain. */
24371 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24373 /* For regparm 3, we have no free call-clobbered registers in
24374 which to store the static chain. In order to implement this,
24375 we have the trampoline push the static chain to the stack.
24376 However, we can't push a value below the return address when
24377 we call the nested function directly, so we have to use an
24378 alternate entry point. For this we use ESI, and have the
24379 alternate entry point push ESI, so that things appear the
24380 same once we're executing the nested function. */
24383 if (fndecl
== current_function_decl
)
24384 ix86_static_chain_on_stack
= true;
24385 return gen_frame_mem (SImode
,
24386 plus_constant (arg_pointer_rtx
, -8));
24392 return gen_rtx_REG (Pmode
, regno
);
24395 /* Emit RTL insns to initialize the variable parts of a trampoline.
24396 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24397 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24398 to be passed to the target function. */
24401 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24407 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24413 /* Load the function address to r11. Try to load address using
24414 the shorter movl instead of movabs. We may want to support
24415 movq for kernel mode, but kernel does not use trampolines at
24416 the moment. FNADDR is a 32bit address and may not be in
24417 DImode when ptr_mode == SImode. Always use movl in this
24419 if (ptr_mode
== SImode
24420 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24422 fnaddr
= copy_addr_to_reg (fnaddr
);
24424 mem
= adjust_address (m_tramp
, HImode
, offset
);
24425 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24427 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24428 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24433 mem
= adjust_address (m_tramp
, HImode
, offset
);
24434 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24436 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24437 emit_move_insn (mem
, fnaddr
);
24441 /* Load static chain using movabs to r10. Use the shorter movl
24442 instead of movabs when ptr_mode == SImode. */
24443 if (ptr_mode
== SImode
)
24454 mem
= adjust_address (m_tramp
, HImode
, offset
);
24455 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24457 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24458 emit_move_insn (mem
, chain_value
);
24461 /* Jump to r11; the last (unused) byte is a nop, only there to
24462 pad the write out to a single 32-bit store. */
24463 mem
= adjust_address (m_tramp
, SImode
, offset
);
24464 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24471 /* Depending on the static chain location, either load a register
24472 with a constant, or push the constant to the stack. All of the
24473 instructions are the same size. */
24474 chain
= ix86_static_chain (fndecl
, true);
24477 switch (REGNO (chain
))
24480 opcode
= 0xb8; break;
24482 opcode
= 0xb9; break;
24484 gcc_unreachable ();
24490 mem
= adjust_address (m_tramp
, QImode
, offset
);
24491 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24493 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24494 emit_move_insn (mem
, chain_value
);
24497 mem
= adjust_address (m_tramp
, QImode
, offset
);
24498 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24500 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24502 /* Compute offset from the end of the jmp to the target function.
24503 In the case in which the trampoline stores the static chain on
24504 the stack, we need to skip the first insn which pushes the
24505 (call-saved) register static chain; this push is 1 byte. */
24507 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24508 plus_constant (XEXP (m_tramp
, 0),
24509 offset
- (MEM_P (chain
) ? 1 : 0)),
24510 NULL_RTX
, 1, OPTAB_DIRECT
);
24511 emit_move_insn (mem
, disp
);
24514 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24516 #ifdef HAVE_ENABLE_EXECUTE_STACK
24517 #ifdef CHECK_EXECUTE_STACK_ENABLED
24518 if (CHECK_EXECUTE_STACK_ENABLED
)
24520 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24521 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24525 /* The following file contains several enumerations and data structures
24526 built from the definitions in i386-builtin-types.def. */
24528 #include "i386-builtin-types.inc"
24530 /* Table for the ix86 builtin non-function types. */
24531 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24533 /* Retrieve an element from the above table, building some of
24534 the types lazily. */
24537 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24539 unsigned int index
;
24542 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24544 type
= ix86_builtin_type_tab
[(int) tcode
];
24548 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24549 if (tcode
<= IX86_BT_LAST_VECT
)
24551 enum machine_mode mode
;
24553 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24554 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24555 mode
= ix86_builtin_type_vect_mode
[index
];
24557 type
= build_vector_type_for_mode (itype
, mode
);
24563 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24564 if (tcode
<= IX86_BT_LAST_PTR
)
24565 quals
= TYPE_UNQUALIFIED
;
24567 quals
= TYPE_QUAL_CONST
;
24569 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24570 if (quals
!= TYPE_UNQUALIFIED
)
24571 itype
= build_qualified_type (itype
, quals
);
24573 type
= build_pointer_type (itype
);
24576 ix86_builtin_type_tab
[(int) tcode
] = type
;
24580 /* Table for the ix86 builtin function types. */
24581 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24583 /* Retrieve an element from the above table, building some of
24584 the types lazily. */
24587 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24591 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24593 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24597 if (tcode
<= IX86_BT_LAST_FUNC
)
24599 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24600 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24601 tree rtype
, atype
, args
= void_list_node
;
24604 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24605 for (i
= after
- 1; i
> start
; --i
)
24607 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24608 args
= tree_cons (NULL
, atype
, args
);
24611 type
= build_function_type (rtype
, args
);
24615 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24616 enum ix86_builtin_func_type icode
;
24618 icode
= ix86_builtin_func_alias_base
[index
];
24619 type
= ix86_get_builtin_func_type (icode
);
24622 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24627 /* Codes for all the SSE/MMX builtins. */
24630 IX86_BUILTIN_ADDPS
,
24631 IX86_BUILTIN_ADDSS
,
24632 IX86_BUILTIN_DIVPS
,
24633 IX86_BUILTIN_DIVSS
,
24634 IX86_BUILTIN_MULPS
,
24635 IX86_BUILTIN_MULSS
,
24636 IX86_BUILTIN_SUBPS
,
24637 IX86_BUILTIN_SUBSS
,
24639 IX86_BUILTIN_CMPEQPS
,
24640 IX86_BUILTIN_CMPLTPS
,
24641 IX86_BUILTIN_CMPLEPS
,
24642 IX86_BUILTIN_CMPGTPS
,
24643 IX86_BUILTIN_CMPGEPS
,
24644 IX86_BUILTIN_CMPNEQPS
,
24645 IX86_BUILTIN_CMPNLTPS
,
24646 IX86_BUILTIN_CMPNLEPS
,
24647 IX86_BUILTIN_CMPNGTPS
,
24648 IX86_BUILTIN_CMPNGEPS
,
24649 IX86_BUILTIN_CMPORDPS
,
24650 IX86_BUILTIN_CMPUNORDPS
,
24651 IX86_BUILTIN_CMPEQSS
,
24652 IX86_BUILTIN_CMPLTSS
,
24653 IX86_BUILTIN_CMPLESS
,
24654 IX86_BUILTIN_CMPNEQSS
,
24655 IX86_BUILTIN_CMPNLTSS
,
24656 IX86_BUILTIN_CMPNLESS
,
24657 IX86_BUILTIN_CMPNGTSS
,
24658 IX86_BUILTIN_CMPNGESS
,
24659 IX86_BUILTIN_CMPORDSS
,
24660 IX86_BUILTIN_CMPUNORDSS
,
24662 IX86_BUILTIN_COMIEQSS
,
24663 IX86_BUILTIN_COMILTSS
,
24664 IX86_BUILTIN_COMILESS
,
24665 IX86_BUILTIN_COMIGTSS
,
24666 IX86_BUILTIN_COMIGESS
,
24667 IX86_BUILTIN_COMINEQSS
,
24668 IX86_BUILTIN_UCOMIEQSS
,
24669 IX86_BUILTIN_UCOMILTSS
,
24670 IX86_BUILTIN_UCOMILESS
,
24671 IX86_BUILTIN_UCOMIGTSS
,
24672 IX86_BUILTIN_UCOMIGESS
,
24673 IX86_BUILTIN_UCOMINEQSS
,
24675 IX86_BUILTIN_CVTPI2PS
,
24676 IX86_BUILTIN_CVTPS2PI
,
24677 IX86_BUILTIN_CVTSI2SS
,
24678 IX86_BUILTIN_CVTSI642SS
,
24679 IX86_BUILTIN_CVTSS2SI
,
24680 IX86_BUILTIN_CVTSS2SI64
,
24681 IX86_BUILTIN_CVTTPS2PI
,
24682 IX86_BUILTIN_CVTTSS2SI
,
24683 IX86_BUILTIN_CVTTSS2SI64
,
24685 IX86_BUILTIN_MAXPS
,
24686 IX86_BUILTIN_MAXSS
,
24687 IX86_BUILTIN_MINPS
,
24688 IX86_BUILTIN_MINSS
,
24690 IX86_BUILTIN_LOADUPS
,
24691 IX86_BUILTIN_STOREUPS
,
24692 IX86_BUILTIN_MOVSS
,
24694 IX86_BUILTIN_MOVHLPS
,
24695 IX86_BUILTIN_MOVLHPS
,
24696 IX86_BUILTIN_LOADHPS
,
24697 IX86_BUILTIN_LOADLPS
,
24698 IX86_BUILTIN_STOREHPS
,
24699 IX86_BUILTIN_STORELPS
,
24701 IX86_BUILTIN_MASKMOVQ
,
24702 IX86_BUILTIN_MOVMSKPS
,
24703 IX86_BUILTIN_PMOVMSKB
,
24705 IX86_BUILTIN_MOVNTPS
,
24706 IX86_BUILTIN_MOVNTQ
,
24708 IX86_BUILTIN_LOADDQU
,
24709 IX86_BUILTIN_STOREDQU
,
24711 IX86_BUILTIN_PACKSSWB
,
24712 IX86_BUILTIN_PACKSSDW
,
24713 IX86_BUILTIN_PACKUSWB
,
24715 IX86_BUILTIN_PADDB
,
24716 IX86_BUILTIN_PADDW
,
24717 IX86_BUILTIN_PADDD
,
24718 IX86_BUILTIN_PADDQ
,
24719 IX86_BUILTIN_PADDSB
,
24720 IX86_BUILTIN_PADDSW
,
24721 IX86_BUILTIN_PADDUSB
,
24722 IX86_BUILTIN_PADDUSW
,
24723 IX86_BUILTIN_PSUBB
,
24724 IX86_BUILTIN_PSUBW
,
24725 IX86_BUILTIN_PSUBD
,
24726 IX86_BUILTIN_PSUBQ
,
24727 IX86_BUILTIN_PSUBSB
,
24728 IX86_BUILTIN_PSUBSW
,
24729 IX86_BUILTIN_PSUBUSB
,
24730 IX86_BUILTIN_PSUBUSW
,
24733 IX86_BUILTIN_PANDN
,
24737 IX86_BUILTIN_PAVGB
,
24738 IX86_BUILTIN_PAVGW
,
24740 IX86_BUILTIN_PCMPEQB
,
24741 IX86_BUILTIN_PCMPEQW
,
24742 IX86_BUILTIN_PCMPEQD
,
24743 IX86_BUILTIN_PCMPGTB
,
24744 IX86_BUILTIN_PCMPGTW
,
24745 IX86_BUILTIN_PCMPGTD
,
24747 IX86_BUILTIN_PMADDWD
,
24749 IX86_BUILTIN_PMAXSW
,
24750 IX86_BUILTIN_PMAXUB
,
24751 IX86_BUILTIN_PMINSW
,
24752 IX86_BUILTIN_PMINUB
,
24754 IX86_BUILTIN_PMULHUW
,
24755 IX86_BUILTIN_PMULHW
,
24756 IX86_BUILTIN_PMULLW
,
24758 IX86_BUILTIN_PSADBW
,
24759 IX86_BUILTIN_PSHUFW
,
24761 IX86_BUILTIN_PSLLW
,
24762 IX86_BUILTIN_PSLLD
,
24763 IX86_BUILTIN_PSLLQ
,
24764 IX86_BUILTIN_PSRAW
,
24765 IX86_BUILTIN_PSRAD
,
24766 IX86_BUILTIN_PSRLW
,
24767 IX86_BUILTIN_PSRLD
,
24768 IX86_BUILTIN_PSRLQ
,
24769 IX86_BUILTIN_PSLLWI
,
24770 IX86_BUILTIN_PSLLDI
,
24771 IX86_BUILTIN_PSLLQI
,
24772 IX86_BUILTIN_PSRAWI
,
24773 IX86_BUILTIN_PSRADI
,
24774 IX86_BUILTIN_PSRLWI
,
24775 IX86_BUILTIN_PSRLDI
,
24776 IX86_BUILTIN_PSRLQI
,
24778 IX86_BUILTIN_PUNPCKHBW
,
24779 IX86_BUILTIN_PUNPCKHWD
,
24780 IX86_BUILTIN_PUNPCKHDQ
,
24781 IX86_BUILTIN_PUNPCKLBW
,
24782 IX86_BUILTIN_PUNPCKLWD
,
24783 IX86_BUILTIN_PUNPCKLDQ
,
24785 IX86_BUILTIN_SHUFPS
,
24787 IX86_BUILTIN_RCPPS
,
24788 IX86_BUILTIN_RCPSS
,
24789 IX86_BUILTIN_RSQRTPS
,
24790 IX86_BUILTIN_RSQRTPS_NR
,
24791 IX86_BUILTIN_RSQRTSS
,
24792 IX86_BUILTIN_RSQRTF
,
24793 IX86_BUILTIN_SQRTPS
,
24794 IX86_BUILTIN_SQRTPS_NR
,
24795 IX86_BUILTIN_SQRTSS
,
24797 IX86_BUILTIN_UNPCKHPS
,
24798 IX86_BUILTIN_UNPCKLPS
,
24800 IX86_BUILTIN_ANDPS
,
24801 IX86_BUILTIN_ANDNPS
,
24803 IX86_BUILTIN_XORPS
,
24806 IX86_BUILTIN_LDMXCSR
,
24807 IX86_BUILTIN_STMXCSR
,
24808 IX86_BUILTIN_SFENCE
,
24810 /* 3DNow! Original */
24811 IX86_BUILTIN_FEMMS
,
24812 IX86_BUILTIN_PAVGUSB
,
24813 IX86_BUILTIN_PF2ID
,
24814 IX86_BUILTIN_PFACC
,
24815 IX86_BUILTIN_PFADD
,
24816 IX86_BUILTIN_PFCMPEQ
,
24817 IX86_BUILTIN_PFCMPGE
,
24818 IX86_BUILTIN_PFCMPGT
,
24819 IX86_BUILTIN_PFMAX
,
24820 IX86_BUILTIN_PFMIN
,
24821 IX86_BUILTIN_PFMUL
,
24822 IX86_BUILTIN_PFRCP
,
24823 IX86_BUILTIN_PFRCPIT1
,
24824 IX86_BUILTIN_PFRCPIT2
,
24825 IX86_BUILTIN_PFRSQIT1
,
24826 IX86_BUILTIN_PFRSQRT
,
24827 IX86_BUILTIN_PFSUB
,
24828 IX86_BUILTIN_PFSUBR
,
24829 IX86_BUILTIN_PI2FD
,
24830 IX86_BUILTIN_PMULHRW
,
24832 /* 3DNow! Athlon Extensions */
24833 IX86_BUILTIN_PF2IW
,
24834 IX86_BUILTIN_PFNACC
,
24835 IX86_BUILTIN_PFPNACC
,
24836 IX86_BUILTIN_PI2FW
,
24837 IX86_BUILTIN_PSWAPDSI
,
24838 IX86_BUILTIN_PSWAPDSF
,
24841 IX86_BUILTIN_ADDPD
,
24842 IX86_BUILTIN_ADDSD
,
24843 IX86_BUILTIN_DIVPD
,
24844 IX86_BUILTIN_DIVSD
,
24845 IX86_BUILTIN_MULPD
,
24846 IX86_BUILTIN_MULSD
,
24847 IX86_BUILTIN_SUBPD
,
24848 IX86_BUILTIN_SUBSD
,
24850 IX86_BUILTIN_CMPEQPD
,
24851 IX86_BUILTIN_CMPLTPD
,
24852 IX86_BUILTIN_CMPLEPD
,
24853 IX86_BUILTIN_CMPGTPD
,
24854 IX86_BUILTIN_CMPGEPD
,
24855 IX86_BUILTIN_CMPNEQPD
,
24856 IX86_BUILTIN_CMPNLTPD
,
24857 IX86_BUILTIN_CMPNLEPD
,
24858 IX86_BUILTIN_CMPNGTPD
,
24859 IX86_BUILTIN_CMPNGEPD
,
24860 IX86_BUILTIN_CMPORDPD
,
24861 IX86_BUILTIN_CMPUNORDPD
,
24862 IX86_BUILTIN_CMPEQSD
,
24863 IX86_BUILTIN_CMPLTSD
,
24864 IX86_BUILTIN_CMPLESD
,
24865 IX86_BUILTIN_CMPNEQSD
,
24866 IX86_BUILTIN_CMPNLTSD
,
24867 IX86_BUILTIN_CMPNLESD
,
24868 IX86_BUILTIN_CMPORDSD
,
24869 IX86_BUILTIN_CMPUNORDSD
,
24871 IX86_BUILTIN_COMIEQSD
,
24872 IX86_BUILTIN_COMILTSD
,
24873 IX86_BUILTIN_COMILESD
,
24874 IX86_BUILTIN_COMIGTSD
,
24875 IX86_BUILTIN_COMIGESD
,
24876 IX86_BUILTIN_COMINEQSD
,
24877 IX86_BUILTIN_UCOMIEQSD
,
24878 IX86_BUILTIN_UCOMILTSD
,
24879 IX86_BUILTIN_UCOMILESD
,
24880 IX86_BUILTIN_UCOMIGTSD
,
24881 IX86_BUILTIN_UCOMIGESD
,
24882 IX86_BUILTIN_UCOMINEQSD
,
24884 IX86_BUILTIN_MAXPD
,
24885 IX86_BUILTIN_MAXSD
,
24886 IX86_BUILTIN_MINPD
,
24887 IX86_BUILTIN_MINSD
,
24889 IX86_BUILTIN_ANDPD
,
24890 IX86_BUILTIN_ANDNPD
,
24892 IX86_BUILTIN_XORPD
,
24894 IX86_BUILTIN_SQRTPD
,
24895 IX86_BUILTIN_SQRTSD
,
24897 IX86_BUILTIN_UNPCKHPD
,
24898 IX86_BUILTIN_UNPCKLPD
,
24900 IX86_BUILTIN_SHUFPD
,
24902 IX86_BUILTIN_LOADUPD
,
24903 IX86_BUILTIN_STOREUPD
,
24904 IX86_BUILTIN_MOVSD
,
24906 IX86_BUILTIN_LOADHPD
,
24907 IX86_BUILTIN_LOADLPD
,
24909 IX86_BUILTIN_CVTDQ2PD
,
24910 IX86_BUILTIN_CVTDQ2PS
,
24912 IX86_BUILTIN_CVTPD2DQ
,
24913 IX86_BUILTIN_CVTPD2PI
,
24914 IX86_BUILTIN_CVTPD2PS
,
24915 IX86_BUILTIN_CVTTPD2DQ
,
24916 IX86_BUILTIN_CVTTPD2PI
,
24918 IX86_BUILTIN_CVTPI2PD
,
24919 IX86_BUILTIN_CVTSI2SD
,
24920 IX86_BUILTIN_CVTSI642SD
,
24922 IX86_BUILTIN_CVTSD2SI
,
24923 IX86_BUILTIN_CVTSD2SI64
,
24924 IX86_BUILTIN_CVTSD2SS
,
24925 IX86_BUILTIN_CVTSS2SD
,
24926 IX86_BUILTIN_CVTTSD2SI
,
24927 IX86_BUILTIN_CVTTSD2SI64
,
24929 IX86_BUILTIN_CVTPS2DQ
,
24930 IX86_BUILTIN_CVTPS2PD
,
24931 IX86_BUILTIN_CVTTPS2DQ
,
24933 IX86_BUILTIN_MOVNTI
,
24934 IX86_BUILTIN_MOVNTI64
,
24935 IX86_BUILTIN_MOVNTPD
,
24936 IX86_BUILTIN_MOVNTDQ
,
24938 IX86_BUILTIN_MOVQ128
,
24941 IX86_BUILTIN_MASKMOVDQU
,
24942 IX86_BUILTIN_MOVMSKPD
,
24943 IX86_BUILTIN_PMOVMSKB128
,
24945 IX86_BUILTIN_PACKSSWB128
,
24946 IX86_BUILTIN_PACKSSDW128
,
24947 IX86_BUILTIN_PACKUSWB128
,
24949 IX86_BUILTIN_PADDB128
,
24950 IX86_BUILTIN_PADDW128
,
24951 IX86_BUILTIN_PADDD128
,
24952 IX86_BUILTIN_PADDQ128
,
24953 IX86_BUILTIN_PADDSB128
,
24954 IX86_BUILTIN_PADDSW128
,
24955 IX86_BUILTIN_PADDUSB128
,
24956 IX86_BUILTIN_PADDUSW128
,
24957 IX86_BUILTIN_PSUBB128
,
24958 IX86_BUILTIN_PSUBW128
,
24959 IX86_BUILTIN_PSUBD128
,
24960 IX86_BUILTIN_PSUBQ128
,
24961 IX86_BUILTIN_PSUBSB128
,
24962 IX86_BUILTIN_PSUBSW128
,
24963 IX86_BUILTIN_PSUBUSB128
,
24964 IX86_BUILTIN_PSUBUSW128
,
24966 IX86_BUILTIN_PAND128
,
24967 IX86_BUILTIN_PANDN128
,
24968 IX86_BUILTIN_POR128
,
24969 IX86_BUILTIN_PXOR128
,
24971 IX86_BUILTIN_PAVGB128
,
24972 IX86_BUILTIN_PAVGW128
,
24974 IX86_BUILTIN_PCMPEQB128
,
24975 IX86_BUILTIN_PCMPEQW128
,
24976 IX86_BUILTIN_PCMPEQD128
,
24977 IX86_BUILTIN_PCMPGTB128
,
24978 IX86_BUILTIN_PCMPGTW128
,
24979 IX86_BUILTIN_PCMPGTD128
,
24981 IX86_BUILTIN_PMADDWD128
,
24983 IX86_BUILTIN_PMAXSW128
,
24984 IX86_BUILTIN_PMAXUB128
,
24985 IX86_BUILTIN_PMINSW128
,
24986 IX86_BUILTIN_PMINUB128
,
24988 IX86_BUILTIN_PMULUDQ
,
24989 IX86_BUILTIN_PMULUDQ128
,
24990 IX86_BUILTIN_PMULHUW128
,
24991 IX86_BUILTIN_PMULHW128
,
24992 IX86_BUILTIN_PMULLW128
,
24994 IX86_BUILTIN_PSADBW128
,
24995 IX86_BUILTIN_PSHUFHW
,
24996 IX86_BUILTIN_PSHUFLW
,
24997 IX86_BUILTIN_PSHUFD
,
24999 IX86_BUILTIN_PSLLDQI128
,
25000 IX86_BUILTIN_PSLLWI128
,
25001 IX86_BUILTIN_PSLLDI128
,
25002 IX86_BUILTIN_PSLLQI128
,
25003 IX86_BUILTIN_PSRAWI128
,
25004 IX86_BUILTIN_PSRADI128
,
25005 IX86_BUILTIN_PSRLDQI128
,
25006 IX86_BUILTIN_PSRLWI128
,
25007 IX86_BUILTIN_PSRLDI128
,
25008 IX86_BUILTIN_PSRLQI128
,
25010 IX86_BUILTIN_PSLLDQ128
,
25011 IX86_BUILTIN_PSLLW128
,
25012 IX86_BUILTIN_PSLLD128
,
25013 IX86_BUILTIN_PSLLQ128
,
25014 IX86_BUILTIN_PSRAW128
,
25015 IX86_BUILTIN_PSRAD128
,
25016 IX86_BUILTIN_PSRLW128
,
25017 IX86_BUILTIN_PSRLD128
,
25018 IX86_BUILTIN_PSRLQ128
,
25020 IX86_BUILTIN_PUNPCKHBW128
,
25021 IX86_BUILTIN_PUNPCKHWD128
,
25022 IX86_BUILTIN_PUNPCKHDQ128
,
25023 IX86_BUILTIN_PUNPCKHQDQ128
,
25024 IX86_BUILTIN_PUNPCKLBW128
,
25025 IX86_BUILTIN_PUNPCKLWD128
,
25026 IX86_BUILTIN_PUNPCKLDQ128
,
25027 IX86_BUILTIN_PUNPCKLQDQ128
,
25029 IX86_BUILTIN_CLFLUSH
,
25030 IX86_BUILTIN_MFENCE
,
25031 IX86_BUILTIN_LFENCE
,
25032 IX86_BUILTIN_PAUSE
,
25034 IX86_BUILTIN_BSRSI
,
25035 IX86_BUILTIN_BSRDI
,
25036 IX86_BUILTIN_RDPMC
,
25037 IX86_BUILTIN_RDTSC
,
25038 IX86_BUILTIN_RDTSCP
,
25039 IX86_BUILTIN_ROLQI
,
25040 IX86_BUILTIN_ROLHI
,
25041 IX86_BUILTIN_RORQI
,
25042 IX86_BUILTIN_RORHI
,
25045 IX86_BUILTIN_ADDSUBPS
,
25046 IX86_BUILTIN_HADDPS
,
25047 IX86_BUILTIN_HSUBPS
,
25048 IX86_BUILTIN_MOVSHDUP
,
25049 IX86_BUILTIN_MOVSLDUP
,
25050 IX86_BUILTIN_ADDSUBPD
,
25051 IX86_BUILTIN_HADDPD
,
25052 IX86_BUILTIN_HSUBPD
,
25053 IX86_BUILTIN_LDDQU
,
25055 IX86_BUILTIN_MONITOR
,
25056 IX86_BUILTIN_MWAIT
,
25059 IX86_BUILTIN_PHADDW
,
25060 IX86_BUILTIN_PHADDD
,
25061 IX86_BUILTIN_PHADDSW
,
25062 IX86_BUILTIN_PHSUBW
,
25063 IX86_BUILTIN_PHSUBD
,
25064 IX86_BUILTIN_PHSUBSW
,
25065 IX86_BUILTIN_PMADDUBSW
,
25066 IX86_BUILTIN_PMULHRSW
,
25067 IX86_BUILTIN_PSHUFB
,
25068 IX86_BUILTIN_PSIGNB
,
25069 IX86_BUILTIN_PSIGNW
,
25070 IX86_BUILTIN_PSIGND
,
25071 IX86_BUILTIN_PALIGNR
,
25072 IX86_BUILTIN_PABSB
,
25073 IX86_BUILTIN_PABSW
,
25074 IX86_BUILTIN_PABSD
,
25076 IX86_BUILTIN_PHADDW128
,
25077 IX86_BUILTIN_PHADDD128
,
25078 IX86_BUILTIN_PHADDSW128
,
25079 IX86_BUILTIN_PHSUBW128
,
25080 IX86_BUILTIN_PHSUBD128
,
25081 IX86_BUILTIN_PHSUBSW128
,
25082 IX86_BUILTIN_PMADDUBSW128
,
25083 IX86_BUILTIN_PMULHRSW128
,
25084 IX86_BUILTIN_PSHUFB128
,
25085 IX86_BUILTIN_PSIGNB128
,
25086 IX86_BUILTIN_PSIGNW128
,
25087 IX86_BUILTIN_PSIGND128
,
25088 IX86_BUILTIN_PALIGNR128
,
25089 IX86_BUILTIN_PABSB128
,
25090 IX86_BUILTIN_PABSW128
,
25091 IX86_BUILTIN_PABSD128
,
25093 /* AMDFAM10 - SSE4A New Instructions. */
25094 IX86_BUILTIN_MOVNTSD
,
25095 IX86_BUILTIN_MOVNTSS
,
25096 IX86_BUILTIN_EXTRQI
,
25097 IX86_BUILTIN_EXTRQ
,
25098 IX86_BUILTIN_INSERTQI
,
25099 IX86_BUILTIN_INSERTQ
,
25102 IX86_BUILTIN_BLENDPD
,
25103 IX86_BUILTIN_BLENDPS
,
25104 IX86_BUILTIN_BLENDVPD
,
25105 IX86_BUILTIN_BLENDVPS
,
25106 IX86_BUILTIN_PBLENDVB128
,
25107 IX86_BUILTIN_PBLENDW128
,
25112 IX86_BUILTIN_INSERTPS128
,
25114 IX86_BUILTIN_MOVNTDQA
,
25115 IX86_BUILTIN_MPSADBW128
,
25116 IX86_BUILTIN_PACKUSDW128
,
25117 IX86_BUILTIN_PCMPEQQ
,
25118 IX86_BUILTIN_PHMINPOSUW128
,
25120 IX86_BUILTIN_PMAXSB128
,
25121 IX86_BUILTIN_PMAXSD128
,
25122 IX86_BUILTIN_PMAXUD128
,
25123 IX86_BUILTIN_PMAXUW128
,
25125 IX86_BUILTIN_PMINSB128
,
25126 IX86_BUILTIN_PMINSD128
,
25127 IX86_BUILTIN_PMINUD128
,
25128 IX86_BUILTIN_PMINUW128
,
25130 IX86_BUILTIN_PMOVSXBW128
,
25131 IX86_BUILTIN_PMOVSXBD128
,
25132 IX86_BUILTIN_PMOVSXBQ128
,
25133 IX86_BUILTIN_PMOVSXWD128
,
25134 IX86_BUILTIN_PMOVSXWQ128
,
25135 IX86_BUILTIN_PMOVSXDQ128
,
25137 IX86_BUILTIN_PMOVZXBW128
,
25138 IX86_BUILTIN_PMOVZXBD128
,
25139 IX86_BUILTIN_PMOVZXBQ128
,
25140 IX86_BUILTIN_PMOVZXWD128
,
25141 IX86_BUILTIN_PMOVZXWQ128
,
25142 IX86_BUILTIN_PMOVZXDQ128
,
25144 IX86_BUILTIN_PMULDQ128
,
25145 IX86_BUILTIN_PMULLD128
,
25147 IX86_BUILTIN_ROUNDSD
,
25148 IX86_BUILTIN_ROUNDSS
,
25150 IX86_BUILTIN_ROUNDPD
,
25151 IX86_BUILTIN_ROUNDPS
,
25153 IX86_BUILTIN_FLOORPD
,
25154 IX86_BUILTIN_CEILPD
,
25155 IX86_BUILTIN_TRUNCPD
,
25156 IX86_BUILTIN_RINTPD
,
25157 IX86_BUILTIN_ROUNDPD_AZ
,
25159 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25160 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25161 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25163 IX86_BUILTIN_FLOORPS
,
25164 IX86_BUILTIN_CEILPS
,
25165 IX86_BUILTIN_TRUNCPS
,
25166 IX86_BUILTIN_RINTPS
,
25167 IX86_BUILTIN_ROUNDPS_AZ
,
25169 IX86_BUILTIN_FLOORPS_SFIX
,
25170 IX86_BUILTIN_CEILPS_SFIX
,
25171 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25173 IX86_BUILTIN_PTESTZ
,
25174 IX86_BUILTIN_PTESTC
,
25175 IX86_BUILTIN_PTESTNZC
,
25177 IX86_BUILTIN_VEC_INIT_V2SI
,
25178 IX86_BUILTIN_VEC_INIT_V4HI
,
25179 IX86_BUILTIN_VEC_INIT_V8QI
,
25180 IX86_BUILTIN_VEC_EXT_V2DF
,
25181 IX86_BUILTIN_VEC_EXT_V2DI
,
25182 IX86_BUILTIN_VEC_EXT_V4SF
,
25183 IX86_BUILTIN_VEC_EXT_V4SI
,
25184 IX86_BUILTIN_VEC_EXT_V8HI
,
25185 IX86_BUILTIN_VEC_EXT_V2SI
,
25186 IX86_BUILTIN_VEC_EXT_V4HI
,
25187 IX86_BUILTIN_VEC_EXT_V16QI
,
25188 IX86_BUILTIN_VEC_SET_V2DI
,
25189 IX86_BUILTIN_VEC_SET_V4SF
,
25190 IX86_BUILTIN_VEC_SET_V4SI
,
25191 IX86_BUILTIN_VEC_SET_V8HI
,
25192 IX86_BUILTIN_VEC_SET_V4HI
,
25193 IX86_BUILTIN_VEC_SET_V16QI
,
25195 IX86_BUILTIN_VEC_PACK_SFIX
,
25196 IX86_BUILTIN_VEC_PACK_SFIX256
,
25199 IX86_BUILTIN_CRC32QI
,
25200 IX86_BUILTIN_CRC32HI
,
25201 IX86_BUILTIN_CRC32SI
,
25202 IX86_BUILTIN_CRC32DI
,
25204 IX86_BUILTIN_PCMPESTRI128
,
25205 IX86_BUILTIN_PCMPESTRM128
,
25206 IX86_BUILTIN_PCMPESTRA128
,
25207 IX86_BUILTIN_PCMPESTRC128
,
25208 IX86_BUILTIN_PCMPESTRO128
,
25209 IX86_BUILTIN_PCMPESTRS128
,
25210 IX86_BUILTIN_PCMPESTRZ128
,
25211 IX86_BUILTIN_PCMPISTRI128
,
25212 IX86_BUILTIN_PCMPISTRM128
,
25213 IX86_BUILTIN_PCMPISTRA128
,
25214 IX86_BUILTIN_PCMPISTRC128
,
25215 IX86_BUILTIN_PCMPISTRO128
,
25216 IX86_BUILTIN_PCMPISTRS128
,
25217 IX86_BUILTIN_PCMPISTRZ128
,
25219 IX86_BUILTIN_PCMPGTQ
,
25221 /* AES instructions */
25222 IX86_BUILTIN_AESENC128
,
25223 IX86_BUILTIN_AESENCLAST128
,
25224 IX86_BUILTIN_AESDEC128
,
25225 IX86_BUILTIN_AESDECLAST128
,
25226 IX86_BUILTIN_AESIMC128
,
25227 IX86_BUILTIN_AESKEYGENASSIST128
,
25229 /* PCLMUL instruction */
25230 IX86_BUILTIN_PCLMULQDQ128
,
25233 IX86_BUILTIN_ADDPD256
,
25234 IX86_BUILTIN_ADDPS256
,
25235 IX86_BUILTIN_ADDSUBPD256
,
25236 IX86_BUILTIN_ADDSUBPS256
,
25237 IX86_BUILTIN_ANDPD256
,
25238 IX86_BUILTIN_ANDPS256
,
25239 IX86_BUILTIN_ANDNPD256
,
25240 IX86_BUILTIN_ANDNPS256
,
25241 IX86_BUILTIN_BLENDPD256
,
25242 IX86_BUILTIN_BLENDPS256
,
25243 IX86_BUILTIN_BLENDVPD256
,
25244 IX86_BUILTIN_BLENDVPS256
,
25245 IX86_BUILTIN_DIVPD256
,
25246 IX86_BUILTIN_DIVPS256
,
25247 IX86_BUILTIN_DPPS256
,
25248 IX86_BUILTIN_HADDPD256
,
25249 IX86_BUILTIN_HADDPS256
,
25250 IX86_BUILTIN_HSUBPD256
,
25251 IX86_BUILTIN_HSUBPS256
,
25252 IX86_BUILTIN_MAXPD256
,
25253 IX86_BUILTIN_MAXPS256
,
25254 IX86_BUILTIN_MINPD256
,
25255 IX86_BUILTIN_MINPS256
,
25256 IX86_BUILTIN_MULPD256
,
25257 IX86_BUILTIN_MULPS256
,
25258 IX86_BUILTIN_ORPD256
,
25259 IX86_BUILTIN_ORPS256
,
25260 IX86_BUILTIN_SHUFPD256
,
25261 IX86_BUILTIN_SHUFPS256
,
25262 IX86_BUILTIN_SUBPD256
,
25263 IX86_BUILTIN_SUBPS256
,
25264 IX86_BUILTIN_XORPD256
,
25265 IX86_BUILTIN_XORPS256
,
25266 IX86_BUILTIN_CMPSD
,
25267 IX86_BUILTIN_CMPSS
,
25268 IX86_BUILTIN_CMPPD
,
25269 IX86_BUILTIN_CMPPS
,
25270 IX86_BUILTIN_CMPPD256
,
25271 IX86_BUILTIN_CMPPS256
,
25272 IX86_BUILTIN_CVTDQ2PD256
,
25273 IX86_BUILTIN_CVTDQ2PS256
,
25274 IX86_BUILTIN_CVTPD2PS256
,
25275 IX86_BUILTIN_CVTPS2DQ256
,
25276 IX86_BUILTIN_CVTPS2PD256
,
25277 IX86_BUILTIN_CVTTPD2DQ256
,
25278 IX86_BUILTIN_CVTPD2DQ256
,
25279 IX86_BUILTIN_CVTTPS2DQ256
,
25280 IX86_BUILTIN_EXTRACTF128PD256
,
25281 IX86_BUILTIN_EXTRACTF128PS256
,
25282 IX86_BUILTIN_EXTRACTF128SI256
,
25283 IX86_BUILTIN_VZEROALL
,
25284 IX86_BUILTIN_VZEROUPPER
,
25285 IX86_BUILTIN_VPERMILVARPD
,
25286 IX86_BUILTIN_VPERMILVARPS
,
25287 IX86_BUILTIN_VPERMILVARPD256
,
25288 IX86_BUILTIN_VPERMILVARPS256
,
25289 IX86_BUILTIN_VPERMILPD
,
25290 IX86_BUILTIN_VPERMILPS
,
25291 IX86_BUILTIN_VPERMILPD256
,
25292 IX86_BUILTIN_VPERMILPS256
,
25293 IX86_BUILTIN_VPERMIL2PD
,
25294 IX86_BUILTIN_VPERMIL2PS
,
25295 IX86_BUILTIN_VPERMIL2PD256
,
25296 IX86_BUILTIN_VPERMIL2PS256
,
25297 IX86_BUILTIN_VPERM2F128PD256
,
25298 IX86_BUILTIN_VPERM2F128PS256
,
25299 IX86_BUILTIN_VPERM2F128SI256
,
25300 IX86_BUILTIN_VBROADCASTSS
,
25301 IX86_BUILTIN_VBROADCASTSD256
,
25302 IX86_BUILTIN_VBROADCASTSS256
,
25303 IX86_BUILTIN_VBROADCASTPD256
,
25304 IX86_BUILTIN_VBROADCASTPS256
,
25305 IX86_BUILTIN_VINSERTF128PD256
,
25306 IX86_BUILTIN_VINSERTF128PS256
,
25307 IX86_BUILTIN_VINSERTF128SI256
,
25308 IX86_BUILTIN_LOADUPD256
,
25309 IX86_BUILTIN_LOADUPS256
,
25310 IX86_BUILTIN_STOREUPD256
,
25311 IX86_BUILTIN_STOREUPS256
,
25312 IX86_BUILTIN_LDDQU256
,
25313 IX86_BUILTIN_MOVNTDQ256
,
25314 IX86_BUILTIN_MOVNTPD256
,
25315 IX86_BUILTIN_MOVNTPS256
,
25316 IX86_BUILTIN_LOADDQU256
,
25317 IX86_BUILTIN_STOREDQU256
,
25318 IX86_BUILTIN_MASKLOADPD
,
25319 IX86_BUILTIN_MASKLOADPS
,
25320 IX86_BUILTIN_MASKSTOREPD
,
25321 IX86_BUILTIN_MASKSTOREPS
,
25322 IX86_BUILTIN_MASKLOADPD256
,
25323 IX86_BUILTIN_MASKLOADPS256
,
25324 IX86_BUILTIN_MASKSTOREPD256
,
25325 IX86_BUILTIN_MASKSTOREPS256
,
25326 IX86_BUILTIN_MOVSHDUP256
,
25327 IX86_BUILTIN_MOVSLDUP256
,
25328 IX86_BUILTIN_MOVDDUP256
,
25330 IX86_BUILTIN_SQRTPD256
,
25331 IX86_BUILTIN_SQRTPS256
,
25332 IX86_BUILTIN_SQRTPS_NR256
,
25333 IX86_BUILTIN_RSQRTPS256
,
25334 IX86_BUILTIN_RSQRTPS_NR256
,
25336 IX86_BUILTIN_RCPPS256
,
25338 IX86_BUILTIN_ROUNDPD256
,
25339 IX86_BUILTIN_ROUNDPS256
,
25341 IX86_BUILTIN_FLOORPD256
,
25342 IX86_BUILTIN_CEILPD256
,
25343 IX86_BUILTIN_TRUNCPD256
,
25344 IX86_BUILTIN_RINTPD256
,
25345 IX86_BUILTIN_ROUNDPD_AZ256
,
25347 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25348 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25349 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25351 IX86_BUILTIN_FLOORPS256
,
25352 IX86_BUILTIN_CEILPS256
,
25353 IX86_BUILTIN_TRUNCPS256
,
25354 IX86_BUILTIN_RINTPS256
,
25355 IX86_BUILTIN_ROUNDPS_AZ256
,
25357 IX86_BUILTIN_FLOORPS_SFIX256
,
25358 IX86_BUILTIN_CEILPS_SFIX256
,
25359 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25361 IX86_BUILTIN_UNPCKHPD256
,
25362 IX86_BUILTIN_UNPCKLPD256
,
25363 IX86_BUILTIN_UNPCKHPS256
,
25364 IX86_BUILTIN_UNPCKLPS256
,
25366 IX86_BUILTIN_SI256_SI
,
25367 IX86_BUILTIN_PS256_PS
,
25368 IX86_BUILTIN_PD256_PD
,
25369 IX86_BUILTIN_SI_SI256
,
25370 IX86_BUILTIN_PS_PS256
,
25371 IX86_BUILTIN_PD_PD256
,
25373 IX86_BUILTIN_VTESTZPD
,
25374 IX86_BUILTIN_VTESTCPD
,
25375 IX86_BUILTIN_VTESTNZCPD
,
25376 IX86_BUILTIN_VTESTZPS
,
25377 IX86_BUILTIN_VTESTCPS
,
25378 IX86_BUILTIN_VTESTNZCPS
,
25379 IX86_BUILTIN_VTESTZPD256
,
25380 IX86_BUILTIN_VTESTCPD256
,
25381 IX86_BUILTIN_VTESTNZCPD256
,
25382 IX86_BUILTIN_VTESTZPS256
,
25383 IX86_BUILTIN_VTESTCPS256
,
25384 IX86_BUILTIN_VTESTNZCPS256
,
25385 IX86_BUILTIN_PTESTZ256
,
25386 IX86_BUILTIN_PTESTC256
,
25387 IX86_BUILTIN_PTESTNZC256
,
25389 IX86_BUILTIN_MOVMSKPD256
,
25390 IX86_BUILTIN_MOVMSKPS256
,
25393 IX86_BUILTIN_MPSADBW256
,
25394 IX86_BUILTIN_PABSB256
,
25395 IX86_BUILTIN_PABSW256
,
25396 IX86_BUILTIN_PABSD256
,
25397 IX86_BUILTIN_PACKSSDW256
,
25398 IX86_BUILTIN_PACKSSWB256
,
25399 IX86_BUILTIN_PACKUSDW256
,
25400 IX86_BUILTIN_PACKUSWB256
,
25401 IX86_BUILTIN_PADDB256
,
25402 IX86_BUILTIN_PADDW256
,
25403 IX86_BUILTIN_PADDD256
,
25404 IX86_BUILTIN_PADDQ256
,
25405 IX86_BUILTIN_PADDSB256
,
25406 IX86_BUILTIN_PADDSW256
,
25407 IX86_BUILTIN_PADDUSB256
,
25408 IX86_BUILTIN_PADDUSW256
,
25409 IX86_BUILTIN_PALIGNR256
,
25410 IX86_BUILTIN_AND256I
,
25411 IX86_BUILTIN_ANDNOT256I
,
25412 IX86_BUILTIN_PAVGB256
,
25413 IX86_BUILTIN_PAVGW256
,
25414 IX86_BUILTIN_PBLENDVB256
,
25415 IX86_BUILTIN_PBLENDVW256
,
25416 IX86_BUILTIN_PCMPEQB256
,
25417 IX86_BUILTIN_PCMPEQW256
,
25418 IX86_BUILTIN_PCMPEQD256
,
25419 IX86_BUILTIN_PCMPEQQ256
,
25420 IX86_BUILTIN_PCMPGTB256
,
25421 IX86_BUILTIN_PCMPGTW256
,
25422 IX86_BUILTIN_PCMPGTD256
,
25423 IX86_BUILTIN_PCMPGTQ256
,
25424 IX86_BUILTIN_PHADDW256
,
25425 IX86_BUILTIN_PHADDD256
,
25426 IX86_BUILTIN_PHADDSW256
,
25427 IX86_BUILTIN_PHSUBW256
,
25428 IX86_BUILTIN_PHSUBD256
,
25429 IX86_BUILTIN_PHSUBSW256
,
25430 IX86_BUILTIN_PMADDUBSW256
,
25431 IX86_BUILTIN_PMADDWD256
,
25432 IX86_BUILTIN_PMAXSB256
,
25433 IX86_BUILTIN_PMAXSW256
,
25434 IX86_BUILTIN_PMAXSD256
,
25435 IX86_BUILTIN_PMAXUB256
,
25436 IX86_BUILTIN_PMAXUW256
,
25437 IX86_BUILTIN_PMAXUD256
,
25438 IX86_BUILTIN_PMINSB256
,
25439 IX86_BUILTIN_PMINSW256
,
25440 IX86_BUILTIN_PMINSD256
,
25441 IX86_BUILTIN_PMINUB256
,
25442 IX86_BUILTIN_PMINUW256
,
25443 IX86_BUILTIN_PMINUD256
,
25444 IX86_BUILTIN_PMOVMSKB256
,
25445 IX86_BUILTIN_PMOVSXBW256
,
25446 IX86_BUILTIN_PMOVSXBD256
,
25447 IX86_BUILTIN_PMOVSXBQ256
,
25448 IX86_BUILTIN_PMOVSXWD256
,
25449 IX86_BUILTIN_PMOVSXWQ256
,
25450 IX86_BUILTIN_PMOVSXDQ256
,
25451 IX86_BUILTIN_PMOVZXBW256
,
25452 IX86_BUILTIN_PMOVZXBD256
,
25453 IX86_BUILTIN_PMOVZXBQ256
,
25454 IX86_BUILTIN_PMOVZXWD256
,
25455 IX86_BUILTIN_PMOVZXWQ256
,
25456 IX86_BUILTIN_PMOVZXDQ256
,
25457 IX86_BUILTIN_PMULDQ256
,
25458 IX86_BUILTIN_PMULHRSW256
,
25459 IX86_BUILTIN_PMULHUW256
,
25460 IX86_BUILTIN_PMULHW256
,
25461 IX86_BUILTIN_PMULLW256
,
25462 IX86_BUILTIN_PMULLD256
,
25463 IX86_BUILTIN_PMULUDQ256
,
25464 IX86_BUILTIN_POR256
,
25465 IX86_BUILTIN_PSADBW256
,
25466 IX86_BUILTIN_PSHUFB256
,
25467 IX86_BUILTIN_PSHUFD256
,
25468 IX86_BUILTIN_PSHUFHW256
,
25469 IX86_BUILTIN_PSHUFLW256
,
25470 IX86_BUILTIN_PSIGNB256
,
25471 IX86_BUILTIN_PSIGNW256
,
25472 IX86_BUILTIN_PSIGND256
,
25473 IX86_BUILTIN_PSLLDQI256
,
25474 IX86_BUILTIN_PSLLWI256
,
25475 IX86_BUILTIN_PSLLW256
,
25476 IX86_BUILTIN_PSLLDI256
,
25477 IX86_BUILTIN_PSLLD256
,
25478 IX86_BUILTIN_PSLLQI256
,
25479 IX86_BUILTIN_PSLLQ256
,
25480 IX86_BUILTIN_PSRAWI256
,
25481 IX86_BUILTIN_PSRAW256
,
25482 IX86_BUILTIN_PSRADI256
,
25483 IX86_BUILTIN_PSRAD256
,
25484 IX86_BUILTIN_PSRLDQI256
,
25485 IX86_BUILTIN_PSRLWI256
,
25486 IX86_BUILTIN_PSRLW256
,
25487 IX86_BUILTIN_PSRLDI256
,
25488 IX86_BUILTIN_PSRLD256
,
25489 IX86_BUILTIN_PSRLQI256
,
25490 IX86_BUILTIN_PSRLQ256
,
25491 IX86_BUILTIN_PSUBB256
,
25492 IX86_BUILTIN_PSUBW256
,
25493 IX86_BUILTIN_PSUBD256
,
25494 IX86_BUILTIN_PSUBQ256
,
25495 IX86_BUILTIN_PSUBSB256
,
25496 IX86_BUILTIN_PSUBSW256
,
25497 IX86_BUILTIN_PSUBUSB256
,
25498 IX86_BUILTIN_PSUBUSW256
,
25499 IX86_BUILTIN_PUNPCKHBW256
,
25500 IX86_BUILTIN_PUNPCKHWD256
,
25501 IX86_BUILTIN_PUNPCKHDQ256
,
25502 IX86_BUILTIN_PUNPCKHQDQ256
,
25503 IX86_BUILTIN_PUNPCKLBW256
,
25504 IX86_BUILTIN_PUNPCKLWD256
,
25505 IX86_BUILTIN_PUNPCKLDQ256
,
25506 IX86_BUILTIN_PUNPCKLQDQ256
,
25507 IX86_BUILTIN_PXOR256
,
25508 IX86_BUILTIN_MOVNTDQA256
,
25509 IX86_BUILTIN_VBROADCASTSS_PS
,
25510 IX86_BUILTIN_VBROADCASTSS_PS256
,
25511 IX86_BUILTIN_VBROADCASTSD_PD256
,
25512 IX86_BUILTIN_VBROADCASTSI256
,
25513 IX86_BUILTIN_PBLENDD256
,
25514 IX86_BUILTIN_PBLENDD128
,
25515 IX86_BUILTIN_PBROADCASTB256
,
25516 IX86_BUILTIN_PBROADCASTW256
,
25517 IX86_BUILTIN_PBROADCASTD256
,
25518 IX86_BUILTIN_PBROADCASTQ256
,
25519 IX86_BUILTIN_PBROADCASTB128
,
25520 IX86_BUILTIN_PBROADCASTW128
,
25521 IX86_BUILTIN_PBROADCASTD128
,
25522 IX86_BUILTIN_PBROADCASTQ128
,
25523 IX86_BUILTIN_VPERMVARSI256
,
25524 IX86_BUILTIN_VPERMDF256
,
25525 IX86_BUILTIN_VPERMVARSF256
,
25526 IX86_BUILTIN_VPERMDI256
,
25527 IX86_BUILTIN_VPERMTI256
,
25528 IX86_BUILTIN_VEXTRACT128I256
,
25529 IX86_BUILTIN_VINSERT128I256
,
25530 IX86_BUILTIN_MASKLOADD
,
25531 IX86_BUILTIN_MASKLOADQ
,
25532 IX86_BUILTIN_MASKLOADD256
,
25533 IX86_BUILTIN_MASKLOADQ256
,
25534 IX86_BUILTIN_MASKSTORED
,
25535 IX86_BUILTIN_MASKSTOREQ
,
25536 IX86_BUILTIN_MASKSTORED256
,
25537 IX86_BUILTIN_MASKSTOREQ256
,
25538 IX86_BUILTIN_PSLLVV4DI
,
25539 IX86_BUILTIN_PSLLVV2DI
,
25540 IX86_BUILTIN_PSLLVV8SI
,
25541 IX86_BUILTIN_PSLLVV4SI
,
25542 IX86_BUILTIN_PSRAVV8SI
,
25543 IX86_BUILTIN_PSRAVV4SI
,
25544 IX86_BUILTIN_PSRLVV4DI
,
25545 IX86_BUILTIN_PSRLVV2DI
,
25546 IX86_BUILTIN_PSRLVV8SI
,
25547 IX86_BUILTIN_PSRLVV4SI
,
25549 IX86_BUILTIN_GATHERSIV2DF
,
25550 IX86_BUILTIN_GATHERSIV4DF
,
25551 IX86_BUILTIN_GATHERDIV2DF
,
25552 IX86_BUILTIN_GATHERDIV4DF
,
25553 IX86_BUILTIN_GATHERSIV4SF
,
25554 IX86_BUILTIN_GATHERSIV8SF
,
25555 IX86_BUILTIN_GATHERDIV4SF
,
25556 IX86_BUILTIN_GATHERDIV8SF
,
25557 IX86_BUILTIN_GATHERSIV2DI
,
25558 IX86_BUILTIN_GATHERSIV4DI
,
25559 IX86_BUILTIN_GATHERDIV2DI
,
25560 IX86_BUILTIN_GATHERDIV4DI
,
25561 IX86_BUILTIN_GATHERSIV4SI
,
25562 IX86_BUILTIN_GATHERSIV8SI
,
25563 IX86_BUILTIN_GATHERDIV4SI
,
25564 IX86_BUILTIN_GATHERDIV8SI
,
25566 /* Alternate 4 element gather for the vectorizer where
25567 all operands are 32-byte wide. */
25568 IX86_BUILTIN_GATHERALTSIV4DF
,
25569 IX86_BUILTIN_GATHERALTDIV8SF
,
25570 IX86_BUILTIN_GATHERALTSIV4DI
,
25571 IX86_BUILTIN_GATHERALTDIV8SI
,
25573 /* TFmode support builtins. */
25575 IX86_BUILTIN_HUGE_VALQ
,
25576 IX86_BUILTIN_FABSQ
,
25577 IX86_BUILTIN_COPYSIGNQ
,
25579 /* Vectorizer support builtins. */
25580 IX86_BUILTIN_CPYSGNPS
,
25581 IX86_BUILTIN_CPYSGNPD
,
25582 IX86_BUILTIN_CPYSGNPS256
,
25583 IX86_BUILTIN_CPYSGNPD256
,
25585 /* FMA4 instructions. */
25586 IX86_BUILTIN_VFMADDSS
,
25587 IX86_BUILTIN_VFMADDSD
,
25588 IX86_BUILTIN_VFMADDPS
,
25589 IX86_BUILTIN_VFMADDPD
,
25590 IX86_BUILTIN_VFMADDPS256
,
25591 IX86_BUILTIN_VFMADDPD256
,
25592 IX86_BUILTIN_VFMADDSUBPS
,
25593 IX86_BUILTIN_VFMADDSUBPD
,
25594 IX86_BUILTIN_VFMADDSUBPS256
,
25595 IX86_BUILTIN_VFMADDSUBPD256
,
25597 /* FMA3 instructions. */
25598 IX86_BUILTIN_VFMADDSS3
,
25599 IX86_BUILTIN_VFMADDSD3
,
25601 /* XOP instructions. */
25602 IX86_BUILTIN_VPCMOV
,
25603 IX86_BUILTIN_VPCMOV_V2DI
,
25604 IX86_BUILTIN_VPCMOV_V4SI
,
25605 IX86_BUILTIN_VPCMOV_V8HI
,
25606 IX86_BUILTIN_VPCMOV_V16QI
,
25607 IX86_BUILTIN_VPCMOV_V4SF
,
25608 IX86_BUILTIN_VPCMOV_V2DF
,
25609 IX86_BUILTIN_VPCMOV256
,
25610 IX86_BUILTIN_VPCMOV_V4DI256
,
25611 IX86_BUILTIN_VPCMOV_V8SI256
,
25612 IX86_BUILTIN_VPCMOV_V16HI256
,
25613 IX86_BUILTIN_VPCMOV_V32QI256
,
25614 IX86_BUILTIN_VPCMOV_V8SF256
,
25615 IX86_BUILTIN_VPCMOV_V4DF256
,
25617 IX86_BUILTIN_VPPERM
,
25619 IX86_BUILTIN_VPMACSSWW
,
25620 IX86_BUILTIN_VPMACSWW
,
25621 IX86_BUILTIN_VPMACSSWD
,
25622 IX86_BUILTIN_VPMACSWD
,
25623 IX86_BUILTIN_VPMACSSDD
,
25624 IX86_BUILTIN_VPMACSDD
,
25625 IX86_BUILTIN_VPMACSSDQL
,
25626 IX86_BUILTIN_VPMACSSDQH
,
25627 IX86_BUILTIN_VPMACSDQL
,
25628 IX86_BUILTIN_VPMACSDQH
,
25629 IX86_BUILTIN_VPMADCSSWD
,
25630 IX86_BUILTIN_VPMADCSWD
,
25632 IX86_BUILTIN_VPHADDBW
,
25633 IX86_BUILTIN_VPHADDBD
,
25634 IX86_BUILTIN_VPHADDBQ
,
25635 IX86_BUILTIN_VPHADDWD
,
25636 IX86_BUILTIN_VPHADDWQ
,
25637 IX86_BUILTIN_VPHADDDQ
,
25638 IX86_BUILTIN_VPHADDUBW
,
25639 IX86_BUILTIN_VPHADDUBD
,
25640 IX86_BUILTIN_VPHADDUBQ
,
25641 IX86_BUILTIN_VPHADDUWD
,
25642 IX86_BUILTIN_VPHADDUWQ
,
25643 IX86_BUILTIN_VPHADDUDQ
,
25644 IX86_BUILTIN_VPHSUBBW
,
25645 IX86_BUILTIN_VPHSUBWD
,
25646 IX86_BUILTIN_VPHSUBDQ
,
25648 IX86_BUILTIN_VPROTB
,
25649 IX86_BUILTIN_VPROTW
,
25650 IX86_BUILTIN_VPROTD
,
25651 IX86_BUILTIN_VPROTQ
,
25652 IX86_BUILTIN_VPROTB_IMM
,
25653 IX86_BUILTIN_VPROTW_IMM
,
25654 IX86_BUILTIN_VPROTD_IMM
,
25655 IX86_BUILTIN_VPROTQ_IMM
,
25657 IX86_BUILTIN_VPSHLB
,
25658 IX86_BUILTIN_VPSHLW
,
25659 IX86_BUILTIN_VPSHLD
,
25660 IX86_BUILTIN_VPSHLQ
,
25661 IX86_BUILTIN_VPSHAB
,
25662 IX86_BUILTIN_VPSHAW
,
25663 IX86_BUILTIN_VPSHAD
,
25664 IX86_BUILTIN_VPSHAQ
,
25666 IX86_BUILTIN_VFRCZSS
,
25667 IX86_BUILTIN_VFRCZSD
,
25668 IX86_BUILTIN_VFRCZPS
,
25669 IX86_BUILTIN_VFRCZPD
,
25670 IX86_BUILTIN_VFRCZPS256
,
25671 IX86_BUILTIN_VFRCZPD256
,
25673 IX86_BUILTIN_VPCOMEQUB
,
25674 IX86_BUILTIN_VPCOMNEUB
,
25675 IX86_BUILTIN_VPCOMLTUB
,
25676 IX86_BUILTIN_VPCOMLEUB
,
25677 IX86_BUILTIN_VPCOMGTUB
,
25678 IX86_BUILTIN_VPCOMGEUB
,
25679 IX86_BUILTIN_VPCOMFALSEUB
,
25680 IX86_BUILTIN_VPCOMTRUEUB
,
25682 IX86_BUILTIN_VPCOMEQUW
,
25683 IX86_BUILTIN_VPCOMNEUW
,
25684 IX86_BUILTIN_VPCOMLTUW
,
25685 IX86_BUILTIN_VPCOMLEUW
,
25686 IX86_BUILTIN_VPCOMGTUW
,
25687 IX86_BUILTIN_VPCOMGEUW
,
25688 IX86_BUILTIN_VPCOMFALSEUW
,
25689 IX86_BUILTIN_VPCOMTRUEUW
,
25691 IX86_BUILTIN_VPCOMEQUD
,
25692 IX86_BUILTIN_VPCOMNEUD
,
25693 IX86_BUILTIN_VPCOMLTUD
,
25694 IX86_BUILTIN_VPCOMLEUD
,
25695 IX86_BUILTIN_VPCOMGTUD
,
25696 IX86_BUILTIN_VPCOMGEUD
,
25697 IX86_BUILTIN_VPCOMFALSEUD
,
25698 IX86_BUILTIN_VPCOMTRUEUD
,
25700 IX86_BUILTIN_VPCOMEQUQ
,
25701 IX86_BUILTIN_VPCOMNEUQ
,
25702 IX86_BUILTIN_VPCOMLTUQ
,
25703 IX86_BUILTIN_VPCOMLEUQ
,
25704 IX86_BUILTIN_VPCOMGTUQ
,
25705 IX86_BUILTIN_VPCOMGEUQ
,
25706 IX86_BUILTIN_VPCOMFALSEUQ
,
25707 IX86_BUILTIN_VPCOMTRUEUQ
,
25709 IX86_BUILTIN_VPCOMEQB
,
25710 IX86_BUILTIN_VPCOMNEB
,
25711 IX86_BUILTIN_VPCOMLTB
,
25712 IX86_BUILTIN_VPCOMLEB
,
25713 IX86_BUILTIN_VPCOMGTB
,
25714 IX86_BUILTIN_VPCOMGEB
,
25715 IX86_BUILTIN_VPCOMFALSEB
,
25716 IX86_BUILTIN_VPCOMTRUEB
,
25718 IX86_BUILTIN_VPCOMEQW
,
25719 IX86_BUILTIN_VPCOMNEW
,
25720 IX86_BUILTIN_VPCOMLTW
,
25721 IX86_BUILTIN_VPCOMLEW
,
25722 IX86_BUILTIN_VPCOMGTW
,
25723 IX86_BUILTIN_VPCOMGEW
,
25724 IX86_BUILTIN_VPCOMFALSEW
,
25725 IX86_BUILTIN_VPCOMTRUEW
,
25727 IX86_BUILTIN_VPCOMEQD
,
25728 IX86_BUILTIN_VPCOMNED
,
25729 IX86_BUILTIN_VPCOMLTD
,
25730 IX86_BUILTIN_VPCOMLED
,
25731 IX86_BUILTIN_VPCOMGTD
,
25732 IX86_BUILTIN_VPCOMGED
,
25733 IX86_BUILTIN_VPCOMFALSED
,
25734 IX86_BUILTIN_VPCOMTRUED
,
25736 IX86_BUILTIN_VPCOMEQQ
,
25737 IX86_BUILTIN_VPCOMNEQ
,
25738 IX86_BUILTIN_VPCOMLTQ
,
25739 IX86_BUILTIN_VPCOMLEQ
,
25740 IX86_BUILTIN_VPCOMGTQ
,
25741 IX86_BUILTIN_VPCOMGEQ
,
25742 IX86_BUILTIN_VPCOMFALSEQ
,
25743 IX86_BUILTIN_VPCOMTRUEQ
,
25745 /* LWP instructions. */
25746 IX86_BUILTIN_LLWPCB
,
25747 IX86_BUILTIN_SLWPCB
,
25748 IX86_BUILTIN_LWPVAL32
,
25749 IX86_BUILTIN_LWPVAL64
,
25750 IX86_BUILTIN_LWPINS32
,
25751 IX86_BUILTIN_LWPINS64
,
25756 IX86_BUILTIN_XBEGIN
,
25758 IX86_BUILTIN_XABORT
,
25759 IX86_BUILTIN_XTEST
,
25761 /* BMI instructions. */
25762 IX86_BUILTIN_BEXTR32
,
25763 IX86_BUILTIN_BEXTR64
,
25766 /* TBM instructions. */
25767 IX86_BUILTIN_BEXTRI32
,
25768 IX86_BUILTIN_BEXTRI64
,
25770 /* BMI2 instructions. */
25771 IX86_BUILTIN_BZHI32
,
25772 IX86_BUILTIN_BZHI64
,
25773 IX86_BUILTIN_PDEP32
,
25774 IX86_BUILTIN_PDEP64
,
25775 IX86_BUILTIN_PEXT32
,
25776 IX86_BUILTIN_PEXT64
,
25778 /* FSGSBASE instructions. */
25779 IX86_BUILTIN_RDFSBASE32
,
25780 IX86_BUILTIN_RDFSBASE64
,
25781 IX86_BUILTIN_RDGSBASE32
,
25782 IX86_BUILTIN_RDGSBASE64
,
25783 IX86_BUILTIN_WRFSBASE32
,
25784 IX86_BUILTIN_WRFSBASE64
,
25785 IX86_BUILTIN_WRGSBASE32
,
25786 IX86_BUILTIN_WRGSBASE64
,
25788 /* RDRND instructions. */
25789 IX86_BUILTIN_RDRAND16_STEP
,
25790 IX86_BUILTIN_RDRAND32_STEP
,
25791 IX86_BUILTIN_RDRAND64_STEP
,
25793 /* F16C instructions. */
25794 IX86_BUILTIN_CVTPH2PS
,
25795 IX86_BUILTIN_CVTPH2PS256
,
25796 IX86_BUILTIN_CVTPS2PH
,
25797 IX86_BUILTIN_CVTPS2PH256
,
25799 /* CFString built-in for darwin */
25800 IX86_BUILTIN_CFSTRING
,
25805 /* Table for the ix86 builtin decls. */
25806 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
25808 /* Table of all of the builtin functions that are possible with different ISA's
25809 but are waiting to be built until a function is declared to use that
25811 struct builtin_isa
{
25812 const char *name
; /* function name */
25813 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
25814 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
25815 bool const_p
; /* true if the declaration is constant */
25816 bool set_and_not_built_p
;
25819 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
25822 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25823 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25824 function decl in the ix86_builtins array. Returns the function decl or
25825 NULL_TREE, if the builtin was not added.
25827 If the front end has a special hook for builtin functions, delay adding
25828 builtin functions that aren't in the current ISA until the ISA is changed
25829 with function specific optimization. Doing so, can save about 300K for the
25830 default compiler. When the builtin is expanded, check at that time whether
25833 If the front end doesn't have a special hook, record all builtins, even if
25834 it isn't an instruction set in the current ISA in case the user uses
25835 function specific options for a different ISA, so that we don't get scope
25836 errors if a builtin is added in the middle of a function scope. */
25839 def_builtin (HOST_WIDE_INT mask
, const char *name
,
25840 enum ix86_builtin_func_type tcode
,
25841 enum ix86_builtins code
)
25843 tree decl
= NULL_TREE
;
25845 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
25847 ix86_builtins_isa
[(int) code
].isa
= mask
;
25849 mask
&= ~OPTION_MASK_ISA_64BIT
;
25851 || (mask
& ix86_isa_flags
) != 0
25852 || (lang_hooks
.builtin_function
25853 == lang_hooks
.builtin_function_ext_scope
))
25856 tree type
= ix86_get_builtin_func_type (tcode
);
25857 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
25859 ix86_builtins
[(int) code
] = decl
;
25860 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
25864 ix86_builtins
[(int) code
] = NULL_TREE
;
25865 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
25866 ix86_builtins_isa
[(int) code
].name
= name
;
25867 ix86_builtins_isa
[(int) code
].const_p
= false;
25868 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
25875 /* Like def_builtin, but also marks the function decl "const". */
25878 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
25879 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
25881 tree decl
= def_builtin (mask
, name
, tcode
, code
);
25883 TREE_READONLY (decl
) = 1;
25885 ix86_builtins_isa
[(int) code
].const_p
= true;
25890 /* Add any new builtin functions for a given ISA that may not have been
25891 declared. This saves a bit of space compared to adding all of the
25892 declarations to the tree, even if we didn't use them. */
25895 ix86_add_new_builtins (HOST_WIDE_INT isa
)
25899 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
25901 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
25902 && ix86_builtins_isa
[i
].set_and_not_built_p
)
25906 /* Don't define the builtin again. */
25907 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
25909 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
25910 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
25911 type
, i
, BUILT_IN_MD
, NULL
,
25914 ix86_builtins
[i
] = decl
;
25915 if (ix86_builtins_isa
[i
].const_p
)
25916 TREE_READONLY (decl
) = 1;
25921 /* Bits for builtin_description.flag. */
25923 /* Set when we don't support the comparison natively, and should
25924 swap_comparison in order to support it. */
25925 #define BUILTIN_DESC_SWAP_OPERANDS 1
25927 struct builtin_description
25929 const HOST_WIDE_INT mask
;
25930 const enum insn_code icode
;
25931 const char *const name
;
25932 const enum ix86_builtins code
;
25933 const enum rtx_code comparison
;
25937 static const struct builtin_description bdesc_comi
[] =
25939 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
25940 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
25941 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
25942 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
25943 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
25944 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
25945 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
25946 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
25947 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
25948 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
25949 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
25950 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
25951 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
25952 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
25953 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
25954 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
25955 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
25956 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
25957 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
25958 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
25959 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
25960 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
25961 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
25962 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
25965 static const struct builtin_description bdesc_pcmpestr
[] =
25968 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
25969 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
25970 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
25971 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
25972 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
25973 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
25974 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
25977 static const struct builtin_description bdesc_pcmpistr
[] =
25980 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
25981 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
25982 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
25983 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
25984 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
25985 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
25986 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
25989 /* Special builtins with variable number of arguments. */
25990 static const struct builtin_description bdesc_special_args
[] =
25992 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25993 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
25994 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25997 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26000 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26003 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26004 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26005 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26007 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26008 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26009 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26010 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26012 /* SSE or 3DNow!A */
26013 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26014 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26017 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26018 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26019 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26020 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26021 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26022 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26023 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26024 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26025 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26026 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26028 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26029 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26032 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26035 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26038 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26039 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26042 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26043 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26045 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26046 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26047 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26048 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26049 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26051 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26052 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26053 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26054 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26055 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26056 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26057 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26059 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26060 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26061 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26063 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26064 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26065 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26066 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26068 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26069 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26070 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26073 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26074 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26075 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26076 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26077 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26078 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26079 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26080 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26081 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26083 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26084 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26085 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26086 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26087 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26088 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26091 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26092 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26093 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26094 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26095 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26096 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26097 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26098 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26101 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26102 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26103 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26106 /* Builtins with variable number of arguments. */
26107 static const struct builtin_description bdesc_args
[] =
26109 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26110 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26111 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26112 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26113 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26114 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26115 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26118 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26119 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26120 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26121 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26122 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26123 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26125 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26126 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26127 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26128 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26129 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26130 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26131 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26132 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26134 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26135 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26137 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26138 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26139 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26140 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26142 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26143 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26144 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26145 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26146 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26147 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26149 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26150 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26151 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26152 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26153 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26154 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26156 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26157 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26158 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26160 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26162 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26163 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26164 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26165 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26166 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26167 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26169 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26170 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26171 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26172 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26173 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26174 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26176 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26177 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26178 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26179 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26182 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26183 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26184 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26185 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26187 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26188 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26189 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26190 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26191 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26192 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26193 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26194 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26195 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26196 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26197 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26198 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26199 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26200 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26201 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26204 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26205 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26206 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26207 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26208 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26209 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26212 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26213 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26214 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26215 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26216 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26217 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26218 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26219 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26220 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26221 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26222 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26223 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26225 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26227 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26228 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26229 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26230 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26231 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26232 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26233 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26234 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26236 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26237 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26238 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26239 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26240 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26241 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26242 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26243 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26244 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26245 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26246 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26247 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26248 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26249 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26250 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26251 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26252 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26253 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26254 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26255 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26256 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26257 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26259 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26260 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26261 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26262 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26264 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26265 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26266 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26267 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26269 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26271 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26272 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26273 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26274 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26275 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26277 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26278 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26279 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26281 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26283 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26284 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26285 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26287 /* SSE MMX or 3Dnow!A */
26288 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26289 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26290 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26292 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26293 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26294 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26295 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26297 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26298 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26300 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26306 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26307 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26308 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26311 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26314 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26317 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26321 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26322 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26324 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26326 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26329 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26330 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26331 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26332 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26333 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26334 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26335 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26337 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26338 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26339 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26340 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26341 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26342 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26343 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26344 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26345 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26346 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26347 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26348 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26349 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26350 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26351 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26352 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26353 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26354 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26355 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26356 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26358 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26359 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26360 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26361 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26363 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26364 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26365 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26366 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26368 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26370 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26371 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26372 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26374 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26376 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26377 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26378 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26379 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26380 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26381 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26382 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26383 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26385 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26386 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26387 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26388 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26389 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26390 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26391 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26392 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26394 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26395 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26397 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26398 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26399 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26400 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26402 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26403 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26405 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26406 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26407 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26408 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26409 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26412 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26413 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26414 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26415 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26417 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26418 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26419 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26420 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26421 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26422 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26423 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26424 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26426 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26428 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26430 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26431 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26433 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26434 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26436 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26438 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26439 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26440 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26441 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26443 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26444 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26445 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26446 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26447 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26448 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26449 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26451 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26452 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26453 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26454 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26455 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26456 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26457 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26459 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26460 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26461 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26462 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26464 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26465 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26466 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26468 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26470 { OPTION_MASK_ISA_SSE2
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26471 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26473 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26476 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26477 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26480 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26481 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26483 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26484 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26485 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26486 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26487 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26488 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26491 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26492 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26493 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26494 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26495 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26496 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26498 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26499 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26500 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26501 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26502 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26503 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26504 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26505 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26506 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26507 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26508 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26509 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26510 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26511 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26512 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26513 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26514 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26515 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26516 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26517 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26518 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26519 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26520 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26521 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26524 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26525 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26528 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26529 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26530 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26531 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26532 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26533 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26534 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26535 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26536 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26537 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26539 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26540 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26541 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26542 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26543 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26544 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26545 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26546 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26547 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26548 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26549 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26550 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26551 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26553 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26554 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26555 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26556 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26557 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26558 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26559 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26560 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26561 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26562 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26563 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26564 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26567 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26568 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26569 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26570 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26572 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26573 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26574 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26575 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26577 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26578 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26580 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26581 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26583 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26584 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26585 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26586 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26588 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26589 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26591 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26592 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26594 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26595 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26596 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26599 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26600 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26601 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26602 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26603 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26606 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26607 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26608 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26609 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26612 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26613 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26615 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26616 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26617 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26618 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26621 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26624 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26625 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26626 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26627 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26628 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26629 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26630 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26631 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26632 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26633 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26634 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26635 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26636 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26637 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26638 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26639 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26640 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26641 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26642 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26643 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26644 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26645 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26646 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26647 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26648 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26649 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26651 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26652 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26653 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26654 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26656 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26657 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26658 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26659 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26660 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26661 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26662 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26663 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26664 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26665 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26666 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26667 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26668 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26669 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26670 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26671 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26672 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26673 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26674 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26675 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26676 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
26677 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26678 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26679 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26680 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26681 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26682 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26683 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26684 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26685 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26686 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26687 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
26688 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
26689 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
26691 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26692 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26693 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26695 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26696 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26697 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26698 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26699 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26701 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26703 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26704 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26706 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26707 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
26708 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
26709 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26711 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26712 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26714 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26715 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26717 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26718 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
26719 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
26720 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26722 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
26723 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
26725 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26726 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26728 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26729 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26730 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26731 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26733 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26734 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26735 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26736 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
26737 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
26738 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
26740 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26741 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26742 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26743 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26744 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26745 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26746 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26747 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26748 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26749 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26750 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26751 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26752 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26753 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26754 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26756 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
26757 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
26759 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26760 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26762 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26765 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
26766 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
26767 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
26768 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
26769 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26770 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26771 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26772 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26773 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26774 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26775 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26776 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26777 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26778 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26779 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26780 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26781 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
26782 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26783 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26784 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26785 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26786 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
26787 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
26788 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26789 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26790 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26791 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26792 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26793 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26794 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26795 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26796 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26797 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26798 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26799 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26800 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26801 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26802 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26803 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
26804 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26805 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26806 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26807 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26808 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26809 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26810 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26811 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26812 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26813 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26814 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26815 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26816 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
26817 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26818 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26819 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26820 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26821 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26822 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26823 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26824 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26825 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26826 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26827 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26828 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26829 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mulv4siv4di3
, "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26830 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26831 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26832 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26833 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26834 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26835 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulv4siv4di3
, "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26836 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26837 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26838 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26839 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
26840 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26841 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26842 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26843 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26844 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26845 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26846 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26847 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26848 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26849 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26850 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26851 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26852 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26853 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26854 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26855 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26856 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26857 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26858 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26859 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26860 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26861 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26862 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26863 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26864 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26865 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26866 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26867 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26868 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26869 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26870 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26871 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26872 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26873 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26874 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26875 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26876 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26877 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26878 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26879 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26880 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26881 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26882 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26883 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26884 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
26885 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26886 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
26887 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
26888 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26889 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26890 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26891 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26892 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26893 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26894 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26895 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26896 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26897 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
26898 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
26899 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
26900 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
26901 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26902 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26903 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26904 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26905 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26906 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26907 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26908 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26909 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26910 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26912 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26915 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26916 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26917 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26920 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26921 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26924 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
26925 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
26926 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
26927 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
26930 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26931 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26932 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26933 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26934 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26935 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26938 /* FMA4 and XOP. */
26939 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26940 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26941 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26942 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26943 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26944 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26945 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26946 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26947 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26948 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26949 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26950 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26951 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26952 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26953 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26954 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26955 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26956 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26957 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26958 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26959 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26960 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26961 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26962 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26963 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26964 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26965 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26966 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26967 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26968 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26969 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26970 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26971 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26972 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26973 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26974 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26975 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26976 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26977 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26978 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26979 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26980 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26981 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26982 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26983 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26984 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26985 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26986 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26987 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26988 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26989 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26990 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26992 static const struct builtin_description bdesc_multi_arg
[] =
26994 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
26995 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
26996 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26997 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
26998 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
26999 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27001 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27002 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27003 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27004 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27005 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27006 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27008 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27009 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27010 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27011 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27012 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27013 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27014 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27015 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27016 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27017 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27018 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27019 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27021 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27022 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27023 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27024 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27025 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27026 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27027 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27028 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27029 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27030 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27031 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27032 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27034 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27035 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27036 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27037 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27038 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27039 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27040 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27042 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27043 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27044 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27045 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27046 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27047 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27048 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27050 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27052 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27053 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27054 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27055 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27056 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27057 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27058 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27059 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27060 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27061 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27062 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27063 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27065 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27066 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27067 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27068 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27069 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27070 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27071 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27072 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27073 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27074 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27075 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27076 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27077 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27078 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27079 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27080 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27082 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27083 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27084 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27085 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27086 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27087 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27089 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27090 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27091 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27092 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27093 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27094 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27095 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27096 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27097 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27098 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27099 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27100 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27101 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27102 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27103 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27105 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27106 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27107 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27108 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27109 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27110 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27111 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27113 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27114 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27115 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27116 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27117 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27118 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27119 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27121 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27122 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27123 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27124 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27125 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27126 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27127 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27129 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27130 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27131 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27132 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27133 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27134 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27135 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27137 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27138 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27139 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27140 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27141 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27142 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27143 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27145 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27146 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27147 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27148 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27149 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27150 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27151 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27153 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27154 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27155 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27156 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27157 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27158 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27159 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27161 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27162 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27163 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27164 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27165 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27166 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27167 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27169 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27170 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27171 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27172 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27173 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27174 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27175 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27176 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27178 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27179 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27180 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27181 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27182 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27183 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27184 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27185 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27187 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27188 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27189 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27190 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27194 /* TM vector builtins. */
27196 /* Reuse the existing x86-specific `struct builtin_description' cause
27197 we're lazy. Add casts to make them fit. */
27198 static const struct builtin_description bdesc_tm
[] =
27200 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27201 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27202 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27203 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27204 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27205 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27206 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27208 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27209 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27210 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27211 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27212 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27213 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27214 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27216 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27217 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27218 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27219 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27220 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27221 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27222 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27224 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27225 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27226 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27229 /* TM callbacks. */
27231 /* Return the builtin decl needed to load a vector of TYPE. */
27234 ix86_builtin_tm_load (tree type
)
27236 if (TREE_CODE (type
) == VECTOR_TYPE
)
27238 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27241 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27243 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27245 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27251 /* Return the builtin decl needed to store a vector of TYPE. */
27254 ix86_builtin_tm_store (tree type
)
27256 if (TREE_CODE (type
) == VECTOR_TYPE
)
27258 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27261 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27263 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27265 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27271 /* Initialize the transactional memory vector load/store builtins. */
27274 ix86_init_tm_builtins (void)
27276 enum ix86_builtin_func_type ftype
;
27277 const struct builtin_description
*d
;
27280 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27281 tree attrs_log
, attrs_type_log
;
27286 /* If there are no builtins defined, we must be compiling in a
27287 language without trans-mem support. */
27288 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
27291 /* Use whatever attributes a normal TM load has. */
27292 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27293 attrs_load
= DECL_ATTRIBUTES (decl
);
27294 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27295 /* Use whatever attributes a normal TM store has. */
27296 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27297 attrs_store
= DECL_ATTRIBUTES (decl
);
27298 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27299 /* Use whatever attributes a normal TM log has. */
27300 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27301 attrs_log
= DECL_ATTRIBUTES (decl
);
27302 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27304 for (i
= 0, d
= bdesc_tm
;
27305 i
< ARRAY_SIZE (bdesc_tm
);
27308 if ((d
->mask
& ix86_isa_flags
) != 0
27309 || (lang_hooks
.builtin_function
27310 == lang_hooks
.builtin_function_ext_scope
))
27312 tree type
, attrs
, attrs_type
;
27313 enum built_in_function code
= (enum built_in_function
) d
->code
;
27315 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27316 type
= ix86_get_builtin_func_type (ftype
);
27318 if (BUILTIN_TM_LOAD_P (code
))
27320 attrs
= attrs_load
;
27321 attrs_type
= attrs_type_load
;
27323 else if (BUILTIN_TM_STORE_P (code
))
27325 attrs
= attrs_store
;
27326 attrs_type
= attrs_type_store
;
27331 attrs_type
= attrs_type_log
;
27333 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27334 /* The builtin without the prefix for
27335 calling it directly. */
27336 d
->name
+ strlen ("__builtin_"),
27338 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27339 set the TYPE_ATTRIBUTES. */
27340 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27342 set_builtin_decl (code
, decl
, false);
27347 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27348 in the current target ISA to allow the user to compile particular modules
27349 with different target specific options that differ from the command line
27352 ix86_init_mmx_sse_builtins (void)
27354 const struct builtin_description
* d
;
27355 enum ix86_builtin_func_type ftype
;
27358 /* Add all special builtins with variable number of operands. */
27359 for (i
= 0, d
= bdesc_special_args
;
27360 i
< ARRAY_SIZE (bdesc_special_args
);
27366 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27367 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27370 /* Add all builtins with variable number of operands. */
27371 for (i
= 0, d
= bdesc_args
;
27372 i
< ARRAY_SIZE (bdesc_args
);
27378 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27379 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27382 /* pcmpestr[im] insns. */
27383 for (i
= 0, d
= bdesc_pcmpestr
;
27384 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27387 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27388 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27390 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27391 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27394 /* pcmpistr[im] insns. */
27395 for (i
= 0, d
= bdesc_pcmpistr
;
27396 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27399 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27400 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27402 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27403 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27406 /* comi/ucomi insns. */
27407 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27409 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27410 ftype
= INT_FTYPE_V2DF_V2DF
;
27412 ftype
= INT_FTYPE_V4SF_V4SF
;
27413 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27417 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27418 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27419 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27420 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27422 /* SSE or 3DNow!A */
27423 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27424 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27425 IX86_BUILTIN_MASKMOVQ
);
27428 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27429 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27431 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27432 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27433 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27434 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27437 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27438 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27439 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27440 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27443 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27444 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27445 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27446 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27447 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27448 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27449 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27450 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27451 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27452 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27453 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27454 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27457 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27458 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27461 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27462 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27463 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27464 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27465 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27466 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27467 IX86_BUILTIN_RDRAND64_STEP
);
27470 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27471 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27472 IX86_BUILTIN_GATHERSIV2DF
);
27474 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27475 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27476 IX86_BUILTIN_GATHERSIV4DF
);
27478 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27479 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27480 IX86_BUILTIN_GATHERDIV2DF
);
27482 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27483 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27484 IX86_BUILTIN_GATHERDIV4DF
);
27486 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27487 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27488 IX86_BUILTIN_GATHERSIV4SF
);
27490 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27491 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27492 IX86_BUILTIN_GATHERSIV8SF
);
27494 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27495 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27496 IX86_BUILTIN_GATHERDIV4SF
);
27498 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27499 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27500 IX86_BUILTIN_GATHERDIV8SF
);
27502 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27503 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27504 IX86_BUILTIN_GATHERSIV2DI
);
27506 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27507 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27508 IX86_BUILTIN_GATHERSIV4DI
);
27510 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27511 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27512 IX86_BUILTIN_GATHERDIV2DI
);
27514 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27515 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27516 IX86_BUILTIN_GATHERDIV4DI
);
27518 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27519 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27520 IX86_BUILTIN_GATHERSIV4SI
);
27522 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27523 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27524 IX86_BUILTIN_GATHERSIV8SI
);
27526 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27527 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27528 IX86_BUILTIN_GATHERDIV4SI
);
27530 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27531 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27532 IX86_BUILTIN_GATHERDIV8SI
);
27534 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27535 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27536 IX86_BUILTIN_GATHERALTSIV4DF
);
27538 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27539 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27540 IX86_BUILTIN_GATHERALTDIV8SF
);
27542 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27543 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27544 IX86_BUILTIN_GATHERALTSIV4DI
);
27546 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27547 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27548 IX86_BUILTIN_GATHERALTDIV8SI
);
27551 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
27552 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
27554 /* MMX access to the vec_init patterns. */
27555 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27556 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27558 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27559 V4HI_FTYPE_HI_HI_HI_HI
,
27560 IX86_BUILTIN_VEC_INIT_V4HI
);
27562 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27563 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27564 IX86_BUILTIN_VEC_INIT_V8QI
);
27566 /* Access to the vec_extract patterns. */
27567 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27568 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27569 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27570 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27571 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27572 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27573 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27574 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27575 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27576 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27578 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27579 "__builtin_ia32_vec_ext_v4hi",
27580 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27582 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27583 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27585 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27586 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27588 /* Access to the vec_set patterns. */
27589 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27590 "__builtin_ia32_vec_set_v2di",
27591 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27593 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27594 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27596 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27597 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27599 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27600 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27602 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27603 "__builtin_ia32_vec_set_v4hi",
27604 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27606 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27607 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27609 /* Add FMA4 multi-arg argument instructions */
27610 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27615 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27616 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27620 /* Internal method for ix86_init_builtins. */
27623 ix86_init_builtins_va_builtins_abi (void)
27625 tree ms_va_ref
, sysv_va_ref
;
27626 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
27627 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
27628 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
27629 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
27633 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
27634 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
27635 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
27637 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
27640 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
27641 fnvoid_va_start_ms
=
27642 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
27643 fnvoid_va_end_sysv
=
27644 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
27645 fnvoid_va_start_sysv
=
27646 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
27648 fnvoid_va_copy_ms
=
27649 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
27651 fnvoid_va_copy_sysv
=
27652 build_function_type_list (void_type_node
, sysv_va_ref
,
27653 sysv_va_ref
, NULL_TREE
);
27655 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
27656 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27657 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
27658 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27659 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
27660 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27661 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
27662 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27663 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
27664 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27665 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
27666 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27670 ix86_init_builtin_types (void)
27672 tree float128_type_node
, float80_type_node
;
27674 /* The __float80 type. */
27675 float80_type_node
= long_double_type_node
;
27676 if (TYPE_MODE (float80_type_node
) != XFmode
)
27678 /* The __float80 type. */
27679 float80_type_node
= make_node (REAL_TYPE
);
27681 TYPE_PRECISION (float80_type_node
) = 80;
27682 layout_type (float80_type_node
);
27684 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
27686 /* The __float128 type. */
27687 float128_type_node
= make_node (REAL_TYPE
);
27688 TYPE_PRECISION (float128_type_node
) = 128;
27689 layout_type (float128_type_node
);
27690 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
27692 /* This macro is built by i386-builtin-types.awk. */
27693 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
27697 ix86_init_builtins (void)
27701 ix86_init_builtin_types ();
27703 /* TFmode support builtins. */
27704 def_builtin_const (0, "__builtin_infq",
27705 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
27706 def_builtin_const (0, "__builtin_huge_valq",
27707 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
27709 /* We will expand them to normal call if SSE2 isn't available since
27710 they are used by libgcc. */
27711 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
27712 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
27713 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
27714 TREE_READONLY (t
) = 1;
27715 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
27717 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
27718 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
27719 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
27720 TREE_READONLY (t
) = 1;
27721 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
27723 ix86_init_tm_builtins ();
27724 ix86_init_mmx_sse_builtins ();
27727 ix86_init_builtins_va_builtins_abi ();
27729 #ifdef SUBTARGET_INIT_BUILTINS
27730 SUBTARGET_INIT_BUILTINS
;
27734 /* Return the ix86 builtin for CODE. */
27737 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
27739 if (code
>= IX86_BUILTIN_MAX
)
27740 return error_mark_node
;
27742 return ix86_builtins
[code
];
27745 /* Errors in the source file can cause expand_expr to return const0_rtx
27746 where we expect a vector. To avoid crashing, use one of the vector
27747 clear instructions. */
27749 safe_vector_operand (rtx x
, enum machine_mode mode
)
27751 if (x
== const0_rtx
)
27752 x
= CONST0_RTX (mode
);
27756 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27759 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
27762 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27763 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27764 rtx op0
= expand_normal (arg0
);
27765 rtx op1
= expand_normal (arg1
);
27766 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27767 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27768 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
27770 if (VECTOR_MODE_P (mode0
))
27771 op0
= safe_vector_operand (op0
, mode0
);
27772 if (VECTOR_MODE_P (mode1
))
27773 op1
= safe_vector_operand (op1
, mode1
);
27775 if (optimize
|| !target
27776 || GET_MODE (target
) != tmode
27777 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27778 target
= gen_reg_rtx (tmode
);
27780 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
27782 rtx x
= gen_reg_rtx (V4SImode
);
27783 emit_insn (gen_sse2_loadd (x
, op1
));
27784 op1
= gen_lowpart (TImode
, x
);
27787 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
27788 op0
= copy_to_mode_reg (mode0
, op0
);
27789 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
27790 op1
= copy_to_mode_reg (mode1
, op1
);
27792 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
27801 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27804 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
27805 enum ix86_builtin_func_type m_type
,
27806 enum rtx_code sub_code
)
27811 bool comparison_p
= false;
27813 bool last_arg_constant
= false;
27814 int num_memory
= 0;
27817 enum machine_mode mode
;
27820 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27824 case MULTI_ARG_4_DF2_DI_I
:
27825 case MULTI_ARG_4_DF2_DI_I1
:
27826 case MULTI_ARG_4_SF2_SI_I
:
27827 case MULTI_ARG_4_SF2_SI_I1
:
27829 last_arg_constant
= true;
27832 case MULTI_ARG_3_SF
:
27833 case MULTI_ARG_3_DF
:
27834 case MULTI_ARG_3_SF2
:
27835 case MULTI_ARG_3_DF2
:
27836 case MULTI_ARG_3_DI
:
27837 case MULTI_ARG_3_SI
:
27838 case MULTI_ARG_3_SI_DI
:
27839 case MULTI_ARG_3_HI
:
27840 case MULTI_ARG_3_HI_SI
:
27841 case MULTI_ARG_3_QI
:
27842 case MULTI_ARG_3_DI2
:
27843 case MULTI_ARG_3_SI2
:
27844 case MULTI_ARG_3_HI2
:
27845 case MULTI_ARG_3_QI2
:
27849 case MULTI_ARG_2_SF
:
27850 case MULTI_ARG_2_DF
:
27851 case MULTI_ARG_2_DI
:
27852 case MULTI_ARG_2_SI
:
27853 case MULTI_ARG_2_HI
:
27854 case MULTI_ARG_2_QI
:
27858 case MULTI_ARG_2_DI_IMM
:
27859 case MULTI_ARG_2_SI_IMM
:
27860 case MULTI_ARG_2_HI_IMM
:
27861 case MULTI_ARG_2_QI_IMM
:
27863 last_arg_constant
= true;
27866 case MULTI_ARG_1_SF
:
27867 case MULTI_ARG_1_DF
:
27868 case MULTI_ARG_1_SF2
:
27869 case MULTI_ARG_1_DF2
:
27870 case MULTI_ARG_1_DI
:
27871 case MULTI_ARG_1_SI
:
27872 case MULTI_ARG_1_HI
:
27873 case MULTI_ARG_1_QI
:
27874 case MULTI_ARG_1_SI_DI
:
27875 case MULTI_ARG_1_HI_DI
:
27876 case MULTI_ARG_1_HI_SI
:
27877 case MULTI_ARG_1_QI_DI
:
27878 case MULTI_ARG_1_QI_SI
:
27879 case MULTI_ARG_1_QI_HI
:
27883 case MULTI_ARG_2_DI_CMP
:
27884 case MULTI_ARG_2_SI_CMP
:
27885 case MULTI_ARG_2_HI_CMP
:
27886 case MULTI_ARG_2_QI_CMP
:
27888 comparison_p
= true;
27891 case MULTI_ARG_2_SF_TF
:
27892 case MULTI_ARG_2_DF_TF
:
27893 case MULTI_ARG_2_DI_TF
:
27894 case MULTI_ARG_2_SI_TF
:
27895 case MULTI_ARG_2_HI_TF
:
27896 case MULTI_ARG_2_QI_TF
:
27902 gcc_unreachable ();
27905 if (optimize
|| !target
27906 || GET_MODE (target
) != tmode
27907 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27908 target
= gen_reg_rtx (tmode
);
27910 gcc_assert (nargs
<= 4);
27912 for (i
= 0; i
< nargs
; i
++)
27914 tree arg
= CALL_EXPR_ARG (exp
, i
);
27915 rtx op
= expand_normal (arg
);
27916 int adjust
= (comparison_p
) ? 1 : 0;
27917 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
27919 if (last_arg_constant
&& i
== nargs
- 1)
27921 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
27923 enum insn_code new_icode
= icode
;
27926 case CODE_FOR_xop_vpermil2v2df3
:
27927 case CODE_FOR_xop_vpermil2v4sf3
:
27928 case CODE_FOR_xop_vpermil2v4df3
:
27929 case CODE_FOR_xop_vpermil2v8sf3
:
27930 error ("the last argument must be a 2-bit immediate");
27931 return gen_reg_rtx (tmode
);
27932 case CODE_FOR_xop_rotlv2di3
:
27933 new_icode
= CODE_FOR_rotlv2di3
;
27935 case CODE_FOR_xop_rotlv4si3
:
27936 new_icode
= CODE_FOR_rotlv4si3
;
27938 case CODE_FOR_xop_rotlv8hi3
:
27939 new_icode
= CODE_FOR_rotlv8hi3
;
27941 case CODE_FOR_xop_rotlv16qi3
:
27942 new_icode
= CODE_FOR_rotlv16qi3
;
27944 if (CONST_INT_P (op
))
27946 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
27947 op
= GEN_INT (INTVAL (op
) & mask
);
27948 gcc_checking_assert
27949 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
27953 gcc_checking_assert
27955 && insn_data
[new_icode
].operand
[0].mode
== tmode
27956 && insn_data
[new_icode
].operand
[1].mode
== tmode
27957 && insn_data
[new_icode
].operand
[2].mode
== mode
27958 && insn_data
[new_icode
].operand
[0].predicate
27959 == insn_data
[icode
].operand
[0].predicate
27960 && insn_data
[new_icode
].operand
[1].predicate
27961 == insn_data
[icode
].operand
[1].predicate
);
27967 gcc_unreachable ();
27974 if (VECTOR_MODE_P (mode
))
27975 op
= safe_vector_operand (op
, mode
);
27977 /* If we aren't optimizing, only allow one memory operand to be
27979 if (memory_operand (op
, mode
))
27982 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
27985 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
27987 op
= force_reg (mode
, op
);
27991 args
[i
].mode
= mode
;
27997 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
28002 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
28003 GEN_INT ((int)sub_code
));
28004 else if (! comparison_p
)
28005 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
28008 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
28012 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
28017 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
28021 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
28025 gcc_unreachable ();
28035 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28036 insns with vec_merge. */
28039 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
28043 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28044 rtx op1
, op0
= expand_normal (arg0
);
28045 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28046 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28048 if (optimize
|| !target
28049 || GET_MODE (target
) != tmode
28050 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28051 target
= gen_reg_rtx (tmode
);
28053 if (VECTOR_MODE_P (mode0
))
28054 op0
= safe_vector_operand (op0
, mode0
);
28056 if ((optimize
&& !register_operand (op0
, mode0
))
28057 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28058 op0
= copy_to_mode_reg (mode0
, op0
);
28061 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
28062 op1
= copy_to_mode_reg (mode0
, op1
);
28064 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28071 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28074 ix86_expand_sse_compare (const struct builtin_description
*d
,
28075 tree exp
, rtx target
, bool swap
)
28078 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28079 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28080 rtx op0
= expand_normal (arg0
);
28081 rtx op1
= expand_normal (arg1
);
28083 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28084 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28085 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28086 enum rtx_code comparison
= d
->comparison
;
28088 if (VECTOR_MODE_P (mode0
))
28089 op0
= safe_vector_operand (op0
, mode0
);
28090 if (VECTOR_MODE_P (mode1
))
28091 op1
= safe_vector_operand (op1
, mode1
);
28093 /* Swap operands if we have a comparison that isn't available in
28097 rtx tmp
= gen_reg_rtx (mode1
);
28098 emit_move_insn (tmp
, op1
);
28103 if (optimize
|| !target
28104 || GET_MODE (target
) != tmode
28105 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28106 target
= gen_reg_rtx (tmode
);
28108 if ((optimize
&& !register_operand (op0
, mode0
))
28109 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
28110 op0
= copy_to_mode_reg (mode0
, op0
);
28111 if ((optimize
&& !register_operand (op1
, mode1
))
28112 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
28113 op1
= copy_to_mode_reg (mode1
, op1
);
28115 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
28116 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28123 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28126 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28130 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28131 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28132 rtx op0
= expand_normal (arg0
);
28133 rtx op1
= expand_normal (arg1
);
28134 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28135 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28136 enum rtx_code comparison
= d
->comparison
;
28138 if (VECTOR_MODE_P (mode0
))
28139 op0
= safe_vector_operand (op0
, mode0
);
28140 if (VECTOR_MODE_P (mode1
))
28141 op1
= safe_vector_operand (op1
, mode1
);
28143 /* Swap operands if we have a comparison that isn't available in
28145 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28152 target
= gen_reg_rtx (SImode
);
28153 emit_move_insn (target
, const0_rtx
);
28154 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28156 if ((optimize
&& !register_operand (op0
, mode0
))
28157 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28158 op0
= copy_to_mode_reg (mode0
, op0
);
28159 if ((optimize
&& !register_operand (op1
, mode1
))
28160 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28161 op1
= copy_to_mode_reg (mode1
, op1
);
28163 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28167 emit_insn (gen_rtx_SET (VOIDmode
,
28168 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28169 gen_rtx_fmt_ee (comparison
, QImode
,
28173 return SUBREG_REG (target
);
28176 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28179 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28183 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28184 rtx op1
, op0
= expand_normal (arg0
);
28185 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28186 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28188 if (optimize
|| target
== 0
28189 || GET_MODE (target
) != tmode
28190 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28191 target
= gen_reg_rtx (tmode
);
28193 if (VECTOR_MODE_P (mode0
))
28194 op0
= safe_vector_operand (op0
, mode0
);
28196 if ((optimize
&& !register_operand (op0
, mode0
))
28197 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28198 op0
= copy_to_mode_reg (mode0
, op0
);
28200 op1
= GEN_INT (d
->comparison
);
28202 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28210 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28211 tree exp
, rtx target
)
28214 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28215 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28216 rtx op0
= expand_normal (arg0
);
28217 rtx op1
= expand_normal (arg1
);
28219 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28220 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28221 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28223 if (optimize
|| target
== 0
28224 || GET_MODE (target
) != tmode
28225 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28226 target
= gen_reg_rtx (tmode
);
28228 op0
= safe_vector_operand (op0
, mode0
);
28229 op1
= safe_vector_operand (op1
, mode1
);
28231 if ((optimize
&& !register_operand (op0
, mode0
))
28232 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28233 op0
= copy_to_mode_reg (mode0
, op0
);
28234 if ((optimize
&& !register_operand (op1
, mode1
))
28235 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28236 op1
= copy_to_mode_reg (mode1
, op1
);
28238 op2
= GEN_INT (d
->comparison
);
28240 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28247 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28250 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
28254 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28255 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28256 rtx op0
= expand_normal (arg0
);
28257 rtx op1
= expand_normal (arg1
);
28258 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28259 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28260 enum rtx_code comparison
= d
->comparison
;
28262 if (VECTOR_MODE_P (mode0
))
28263 op0
= safe_vector_operand (op0
, mode0
);
28264 if (VECTOR_MODE_P (mode1
))
28265 op1
= safe_vector_operand (op1
, mode1
);
28267 target
= gen_reg_rtx (SImode
);
28268 emit_move_insn (target
, const0_rtx
);
28269 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28271 if ((optimize
&& !register_operand (op0
, mode0
))
28272 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28273 op0
= copy_to_mode_reg (mode0
, op0
);
28274 if ((optimize
&& !register_operand (op1
, mode1
))
28275 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28276 op1
= copy_to_mode_reg (mode1
, op1
);
28278 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28282 emit_insn (gen_rtx_SET (VOIDmode
,
28283 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28284 gen_rtx_fmt_ee (comparison
, QImode
,
28288 return SUBREG_REG (target
);
28291 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28294 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
28295 tree exp
, rtx target
)
28298 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28299 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28300 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28301 tree arg3
= CALL_EXPR_ARG (exp
, 3);
28302 tree arg4
= CALL_EXPR_ARG (exp
, 4);
28303 rtx scratch0
, scratch1
;
28304 rtx op0
= expand_normal (arg0
);
28305 rtx op1
= expand_normal (arg1
);
28306 rtx op2
= expand_normal (arg2
);
28307 rtx op3
= expand_normal (arg3
);
28308 rtx op4
= expand_normal (arg4
);
28309 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
28311 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28312 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28313 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28314 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
28315 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
28316 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
28317 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
28319 if (VECTOR_MODE_P (modev2
))
28320 op0
= safe_vector_operand (op0
, modev2
);
28321 if (VECTOR_MODE_P (modev4
))
28322 op2
= safe_vector_operand (op2
, modev4
);
28324 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28325 op0
= copy_to_mode_reg (modev2
, op0
);
28326 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
28327 op1
= copy_to_mode_reg (modei3
, op1
);
28328 if ((optimize
&& !register_operand (op2
, modev4
))
28329 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
28330 op2
= copy_to_mode_reg (modev4
, op2
);
28331 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
28332 op3
= copy_to_mode_reg (modei5
, op3
);
28334 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
28336 error ("the fifth argument must be an 8-bit immediate");
28340 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
28342 if (optimize
|| !target
28343 || GET_MODE (target
) != tmode0
28344 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28345 target
= gen_reg_rtx (tmode0
);
28347 scratch1
= gen_reg_rtx (tmode1
);
28349 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28351 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28353 if (optimize
|| !target
28354 || GET_MODE (target
) != tmode1
28355 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28356 target
= gen_reg_rtx (tmode1
);
28358 scratch0
= gen_reg_rtx (tmode0
);
28360 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
28364 gcc_assert (d
->flag
);
28366 scratch0
= gen_reg_rtx (tmode0
);
28367 scratch1
= gen_reg_rtx (tmode1
);
28369 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28379 target
= gen_reg_rtx (SImode
);
28380 emit_move_insn (target
, const0_rtx
);
28381 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28384 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28385 gen_rtx_fmt_ee (EQ
, QImode
,
28386 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28389 return SUBREG_REG (target
);
28396 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28399 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
28400 tree exp
, rtx target
)
28403 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28404 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28405 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28406 rtx scratch0
, scratch1
;
28407 rtx op0
= expand_normal (arg0
);
28408 rtx op1
= expand_normal (arg1
);
28409 rtx op2
= expand_normal (arg2
);
28410 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
28412 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28413 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28414 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28415 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
28416 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
28418 if (VECTOR_MODE_P (modev2
))
28419 op0
= safe_vector_operand (op0
, modev2
);
28420 if (VECTOR_MODE_P (modev3
))
28421 op1
= safe_vector_operand (op1
, modev3
);
28423 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28424 op0
= copy_to_mode_reg (modev2
, op0
);
28425 if ((optimize
&& !register_operand (op1
, modev3
))
28426 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
28427 op1
= copy_to_mode_reg (modev3
, op1
);
28429 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
28431 error ("the third argument must be an 8-bit immediate");
28435 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
28437 if (optimize
|| !target
28438 || GET_MODE (target
) != tmode0
28439 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28440 target
= gen_reg_rtx (tmode0
);
28442 scratch1
= gen_reg_rtx (tmode1
);
28444 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
28446 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28448 if (optimize
|| !target
28449 || GET_MODE (target
) != tmode1
28450 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28451 target
= gen_reg_rtx (tmode1
);
28453 scratch0
= gen_reg_rtx (tmode0
);
28455 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
28459 gcc_assert (d
->flag
);
28461 scratch0
= gen_reg_rtx (tmode0
);
28462 scratch1
= gen_reg_rtx (tmode1
);
28464 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
28474 target
= gen_reg_rtx (SImode
);
28475 emit_move_insn (target
, const0_rtx
);
28476 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28479 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28480 gen_rtx_fmt_ee (EQ
, QImode
,
28481 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28484 return SUBREG_REG (target
);
28490 /* Subroutine of ix86_expand_builtin to take care of insns with
28491 variable number of operands. */
28494 ix86_expand_args_builtin (const struct builtin_description
*d
,
28495 tree exp
, rtx target
)
28497 rtx pat
, real_target
;
28498 unsigned int i
, nargs
;
28499 unsigned int nargs_constant
= 0;
28500 int num_memory
= 0;
28504 enum machine_mode mode
;
28506 bool last_arg_count
= false;
28507 enum insn_code icode
= d
->icode
;
28508 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28509 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28510 enum machine_mode rmode
= VOIDmode
;
28512 enum rtx_code comparison
= d
->comparison
;
28514 switch ((enum ix86_builtin_func_type
) d
->flag
)
28516 case V2DF_FTYPE_V2DF_ROUND
:
28517 case V4DF_FTYPE_V4DF_ROUND
:
28518 case V4SF_FTYPE_V4SF_ROUND
:
28519 case V8SF_FTYPE_V8SF_ROUND
:
28520 case V4SI_FTYPE_V4SF_ROUND
:
28521 case V8SI_FTYPE_V8SF_ROUND
:
28522 return ix86_expand_sse_round (d
, exp
, target
);
28523 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
28524 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
28525 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
28526 case INT_FTYPE_V8SF_V8SF_PTEST
:
28527 case INT_FTYPE_V4DI_V4DI_PTEST
:
28528 case INT_FTYPE_V4DF_V4DF_PTEST
:
28529 case INT_FTYPE_V4SF_V4SF_PTEST
:
28530 case INT_FTYPE_V2DI_V2DI_PTEST
:
28531 case INT_FTYPE_V2DF_V2DF_PTEST
:
28532 return ix86_expand_sse_ptest (d
, exp
, target
);
28533 case FLOAT128_FTYPE_FLOAT128
:
28534 case FLOAT_FTYPE_FLOAT
:
28535 case INT_FTYPE_INT
:
28536 case UINT64_FTYPE_INT
:
28537 case UINT16_FTYPE_UINT16
:
28538 case INT64_FTYPE_INT64
:
28539 case INT64_FTYPE_V4SF
:
28540 case INT64_FTYPE_V2DF
:
28541 case INT_FTYPE_V16QI
:
28542 case INT_FTYPE_V8QI
:
28543 case INT_FTYPE_V8SF
:
28544 case INT_FTYPE_V4DF
:
28545 case INT_FTYPE_V4SF
:
28546 case INT_FTYPE_V2DF
:
28547 case INT_FTYPE_V32QI
:
28548 case V16QI_FTYPE_V16QI
:
28549 case V8SI_FTYPE_V8SF
:
28550 case V8SI_FTYPE_V4SI
:
28551 case V8HI_FTYPE_V8HI
:
28552 case V8HI_FTYPE_V16QI
:
28553 case V8QI_FTYPE_V8QI
:
28554 case V8SF_FTYPE_V8SF
:
28555 case V8SF_FTYPE_V8SI
:
28556 case V8SF_FTYPE_V4SF
:
28557 case V8SF_FTYPE_V8HI
:
28558 case V4SI_FTYPE_V4SI
:
28559 case V4SI_FTYPE_V16QI
:
28560 case V4SI_FTYPE_V4SF
:
28561 case V4SI_FTYPE_V8SI
:
28562 case V4SI_FTYPE_V8HI
:
28563 case V4SI_FTYPE_V4DF
:
28564 case V4SI_FTYPE_V2DF
:
28565 case V4HI_FTYPE_V4HI
:
28566 case V4DF_FTYPE_V4DF
:
28567 case V4DF_FTYPE_V4SI
:
28568 case V4DF_FTYPE_V4SF
:
28569 case V4DF_FTYPE_V2DF
:
28570 case V4SF_FTYPE_V4SF
:
28571 case V4SF_FTYPE_V4SI
:
28572 case V4SF_FTYPE_V8SF
:
28573 case V4SF_FTYPE_V4DF
:
28574 case V4SF_FTYPE_V8HI
:
28575 case V4SF_FTYPE_V2DF
:
28576 case V2DI_FTYPE_V2DI
:
28577 case V2DI_FTYPE_V16QI
:
28578 case V2DI_FTYPE_V8HI
:
28579 case V2DI_FTYPE_V4SI
:
28580 case V2DF_FTYPE_V2DF
:
28581 case V2DF_FTYPE_V4SI
:
28582 case V2DF_FTYPE_V4DF
:
28583 case V2DF_FTYPE_V4SF
:
28584 case V2DF_FTYPE_V2SI
:
28585 case V2SI_FTYPE_V2SI
:
28586 case V2SI_FTYPE_V4SF
:
28587 case V2SI_FTYPE_V2SF
:
28588 case V2SI_FTYPE_V2DF
:
28589 case V2SF_FTYPE_V2SF
:
28590 case V2SF_FTYPE_V2SI
:
28591 case V32QI_FTYPE_V32QI
:
28592 case V32QI_FTYPE_V16QI
:
28593 case V16HI_FTYPE_V16HI
:
28594 case V16HI_FTYPE_V8HI
:
28595 case V8SI_FTYPE_V8SI
:
28596 case V16HI_FTYPE_V16QI
:
28597 case V8SI_FTYPE_V16QI
:
28598 case V4DI_FTYPE_V16QI
:
28599 case V8SI_FTYPE_V8HI
:
28600 case V4DI_FTYPE_V8HI
:
28601 case V4DI_FTYPE_V4SI
:
28602 case V4DI_FTYPE_V2DI
:
28605 case V4SF_FTYPE_V4SF_VEC_MERGE
:
28606 case V2DF_FTYPE_V2DF_VEC_MERGE
:
28607 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
28608 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
28609 case V16QI_FTYPE_V16QI_V16QI
:
28610 case V16QI_FTYPE_V8HI_V8HI
:
28611 case V8QI_FTYPE_V8QI_V8QI
:
28612 case V8QI_FTYPE_V4HI_V4HI
:
28613 case V8HI_FTYPE_V8HI_V8HI
:
28614 case V8HI_FTYPE_V16QI_V16QI
:
28615 case V8HI_FTYPE_V4SI_V4SI
:
28616 case V8SF_FTYPE_V8SF_V8SF
:
28617 case V8SF_FTYPE_V8SF_V8SI
:
28618 case V4SI_FTYPE_V4SI_V4SI
:
28619 case V4SI_FTYPE_V8HI_V8HI
:
28620 case V4SI_FTYPE_V4SF_V4SF
:
28621 case V4SI_FTYPE_V2DF_V2DF
:
28622 case V4HI_FTYPE_V4HI_V4HI
:
28623 case V4HI_FTYPE_V8QI_V8QI
:
28624 case V4HI_FTYPE_V2SI_V2SI
:
28625 case V4DF_FTYPE_V4DF_V4DF
:
28626 case V4DF_FTYPE_V4DF_V4DI
:
28627 case V4SF_FTYPE_V4SF_V4SF
:
28628 case V4SF_FTYPE_V4SF_V4SI
:
28629 case V4SF_FTYPE_V4SF_V2SI
:
28630 case V4SF_FTYPE_V4SF_V2DF
:
28631 case V4SF_FTYPE_V4SF_DI
:
28632 case V4SF_FTYPE_V4SF_SI
:
28633 case V2DI_FTYPE_V2DI_V2DI
:
28634 case V2DI_FTYPE_V16QI_V16QI
:
28635 case V2DI_FTYPE_V4SI_V4SI
:
28636 case V2DI_FTYPE_V2DI_V16QI
:
28637 case V2DI_FTYPE_V2DF_V2DF
:
28638 case V2SI_FTYPE_V2SI_V2SI
:
28639 case V2SI_FTYPE_V4HI_V4HI
:
28640 case V2SI_FTYPE_V2SF_V2SF
:
28641 case V2DF_FTYPE_V2DF_V2DF
:
28642 case V2DF_FTYPE_V2DF_V4SF
:
28643 case V2DF_FTYPE_V2DF_V2DI
:
28644 case V2DF_FTYPE_V2DF_DI
:
28645 case V2DF_FTYPE_V2DF_SI
:
28646 case V2SF_FTYPE_V2SF_V2SF
:
28647 case V1DI_FTYPE_V1DI_V1DI
:
28648 case V1DI_FTYPE_V8QI_V8QI
:
28649 case V1DI_FTYPE_V2SI_V2SI
:
28650 case V32QI_FTYPE_V16HI_V16HI
:
28651 case V16HI_FTYPE_V8SI_V8SI
:
28652 case V32QI_FTYPE_V32QI_V32QI
:
28653 case V16HI_FTYPE_V32QI_V32QI
:
28654 case V16HI_FTYPE_V16HI_V16HI
:
28655 case V8SI_FTYPE_V4DF_V4DF
:
28656 case V8SI_FTYPE_V8SI_V8SI
:
28657 case V8SI_FTYPE_V16HI_V16HI
:
28658 case V4DI_FTYPE_V4DI_V4DI
:
28659 case V4DI_FTYPE_V8SI_V8SI
:
28660 if (comparison
== UNKNOWN
)
28661 return ix86_expand_binop_builtin (icode
, exp
, target
);
28664 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
28665 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
28666 gcc_assert (comparison
!= UNKNOWN
);
28670 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
28671 case V16HI_FTYPE_V16HI_SI_COUNT
:
28672 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
28673 case V8SI_FTYPE_V8SI_SI_COUNT
:
28674 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
28675 case V4DI_FTYPE_V4DI_INT_COUNT
:
28676 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
28677 case V8HI_FTYPE_V8HI_SI_COUNT
:
28678 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
28679 case V4SI_FTYPE_V4SI_SI_COUNT
:
28680 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
28681 case V4HI_FTYPE_V4HI_SI_COUNT
:
28682 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
28683 case V2DI_FTYPE_V2DI_SI_COUNT
:
28684 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
28685 case V2SI_FTYPE_V2SI_SI_COUNT
:
28686 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
28687 case V1DI_FTYPE_V1DI_SI_COUNT
:
28689 last_arg_count
= true;
28691 case UINT64_FTYPE_UINT64_UINT64
:
28692 case UINT_FTYPE_UINT_UINT
:
28693 case UINT_FTYPE_UINT_USHORT
:
28694 case UINT_FTYPE_UINT_UCHAR
:
28695 case UINT16_FTYPE_UINT16_INT
:
28696 case UINT8_FTYPE_UINT8_INT
:
28699 case V2DI_FTYPE_V2DI_INT_CONVERT
:
28702 nargs_constant
= 1;
28704 case V4DI_FTYPE_V4DI_INT_CONVERT
:
28707 nargs_constant
= 1;
28709 case V8HI_FTYPE_V8HI_INT
:
28710 case V8HI_FTYPE_V8SF_INT
:
28711 case V8HI_FTYPE_V4SF_INT
:
28712 case V8SF_FTYPE_V8SF_INT
:
28713 case V4SI_FTYPE_V4SI_INT
:
28714 case V4SI_FTYPE_V8SI_INT
:
28715 case V4HI_FTYPE_V4HI_INT
:
28716 case V4DF_FTYPE_V4DF_INT
:
28717 case V4SF_FTYPE_V4SF_INT
:
28718 case V4SF_FTYPE_V8SF_INT
:
28719 case V2DI_FTYPE_V2DI_INT
:
28720 case V2DF_FTYPE_V2DF_INT
:
28721 case V2DF_FTYPE_V4DF_INT
:
28722 case V16HI_FTYPE_V16HI_INT
:
28723 case V8SI_FTYPE_V8SI_INT
:
28724 case V4DI_FTYPE_V4DI_INT
:
28725 case V2DI_FTYPE_V4DI_INT
:
28727 nargs_constant
= 1;
28729 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
28730 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
28731 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
28732 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
28733 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
28734 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
28737 case V32QI_FTYPE_V32QI_V32QI_INT
:
28738 case V16HI_FTYPE_V16HI_V16HI_INT
:
28739 case V16QI_FTYPE_V16QI_V16QI_INT
:
28740 case V4DI_FTYPE_V4DI_V4DI_INT
:
28741 case V8HI_FTYPE_V8HI_V8HI_INT
:
28742 case V8SI_FTYPE_V8SI_V8SI_INT
:
28743 case V8SI_FTYPE_V8SI_V4SI_INT
:
28744 case V8SF_FTYPE_V8SF_V8SF_INT
:
28745 case V8SF_FTYPE_V8SF_V4SF_INT
:
28746 case V4SI_FTYPE_V4SI_V4SI_INT
:
28747 case V4DF_FTYPE_V4DF_V4DF_INT
:
28748 case V4DF_FTYPE_V4DF_V2DF_INT
:
28749 case V4SF_FTYPE_V4SF_V4SF_INT
:
28750 case V2DI_FTYPE_V2DI_V2DI_INT
:
28751 case V4DI_FTYPE_V4DI_V2DI_INT
:
28752 case V2DF_FTYPE_V2DF_V2DF_INT
:
28754 nargs_constant
= 1;
28756 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
28759 nargs_constant
= 1;
28761 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
28764 nargs_constant
= 1;
28766 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
28769 nargs_constant
= 1;
28771 case V2DI_FTYPE_V2DI_UINT_UINT
:
28773 nargs_constant
= 2;
28775 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
28776 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
28777 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
28778 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
28780 nargs_constant
= 1;
28782 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
28784 nargs_constant
= 2;
28787 gcc_unreachable ();
28790 gcc_assert (nargs
<= ARRAY_SIZE (args
));
28792 if (comparison
!= UNKNOWN
)
28794 gcc_assert (nargs
== 2);
28795 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
28798 if (rmode
== VOIDmode
|| rmode
== tmode
)
28802 || GET_MODE (target
) != tmode
28803 || !insn_p
->operand
[0].predicate (target
, tmode
))
28804 target
= gen_reg_rtx (tmode
);
28805 real_target
= target
;
28809 target
= gen_reg_rtx (rmode
);
28810 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
28813 for (i
= 0; i
< nargs
; i
++)
28815 tree arg
= CALL_EXPR_ARG (exp
, i
);
28816 rtx op
= expand_normal (arg
);
28817 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
28818 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
28820 if (last_arg_count
&& (i
+ 1) == nargs
)
28822 /* SIMD shift insns take either an 8-bit immediate or
28823 register as count. But builtin functions take int as
28824 count. If count doesn't match, we put it in register. */
28827 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
28828 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
28829 op
= copy_to_reg (op
);
28832 else if ((nargs
- i
) <= nargs_constant
)
28837 case CODE_FOR_avx2_inserti128
:
28838 case CODE_FOR_avx2_extracti128
:
28839 error ("the last argument must be an 1-bit immediate");
28842 case CODE_FOR_sse4_1_roundsd
:
28843 case CODE_FOR_sse4_1_roundss
:
28845 case CODE_FOR_sse4_1_roundpd
:
28846 case CODE_FOR_sse4_1_roundps
:
28847 case CODE_FOR_avx_roundpd256
:
28848 case CODE_FOR_avx_roundps256
:
28850 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
28851 case CODE_FOR_sse4_1_roundps_sfix
:
28852 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
28853 case CODE_FOR_avx_roundps_sfix256
:
28855 case CODE_FOR_sse4_1_blendps
:
28856 case CODE_FOR_avx_blendpd256
:
28857 case CODE_FOR_avx_vpermilv4df
:
28858 error ("the last argument must be a 4-bit immediate");
28861 case CODE_FOR_sse4_1_blendpd
:
28862 case CODE_FOR_avx_vpermilv2df
:
28863 case CODE_FOR_xop_vpermil2v2df3
:
28864 case CODE_FOR_xop_vpermil2v4sf3
:
28865 case CODE_FOR_xop_vpermil2v4df3
:
28866 case CODE_FOR_xop_vpermil2v8sf3
:
28867 error ("the last argument must be a 2-bit immediate");
28870 case CODE_FOR_avx_vextractf128v4df
:
28871 case CODE_FOR_avx_vextractf128v8sf
:
28872 case CODE_FOR_avx_vextractf128v8si
:
28873 case CODE_FOR_avx_vinsertf128v4df
:
28874 case CODE_FOR_avx_vinsertf128v8sf
:
28875 case CODE_FOR_avx_vinsertf128v8si
:
28876 error ("the last argument must be a 1-bit immediate");
28879 case CODE_FOR_avx_vmcmpv2df3
:
28880 case CODE_FOR_avx_vmcmpv4sf3
:
28881 case CODE_FOR_avx_cmpv2df3
:
28882 case CODE_FOR_avx_cmpv4sf3
:
28883 case CODE_FOR_avx_cmpv4df3
:
28884 case CODE_FOR_avx_cmpv8sf3
:
28885 error ("the last argument must be a 5-bit immediate");
28889 switch (nargs_constant
)
28892 if ((nargs
- i
) == nargs_constant
)
28894 error ("the next to last argument must be an 8-bit immediate");
28898 error ("the last argument must be an 8-bit immediate");
28901 gcc_unreachable ();
28908 if (VECTOR_MODE_P (mode
))
28909 op
= safe_vector_operand (op
, mode
);
28911 /* If we aren't optimizing, only allow one memory operand to
28913 if (memory_operand (op
, mode
))
28916 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
28918 if (optimize
|| !match
|| num_memory
> 1)
28919 op
= copy_to_mode_reg (mode
, op
);
28923 op
= copy_to_reg (op
);
28924 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
28929 args
[i
].mode
= mode
;
28935 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
28938 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
28941 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28945 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28946 args
[2].op
, args
[3].op
);
28949 gcc_unreachable ();
28959 /* Subroutine of ix86_expand_builtin to take care of special insns
28960 with variable number of operands. */
28963 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
28964 tree exp
, rtx target
)
28968 unsigned int i
, nargs
, arg_adjust
, memory
;
28972 enum machine_mode mode
;
28974 enum insn_code icode
= d
->icode
;
28975 bool last_arg_constant
= false;
28976 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28977 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28978 enum { load
, store
} klass
;
28980 switch ((enum ix86_builtin_func_type
) d
->flag
)
28982 case VOID_FTYPE_VOID
:
28983 if (icode
== CODE_FOR_avx_vzeroupper
)
28984 target
= GEN_INT (vzeroupper_intrinsic
);
28985 emit_insn (GEN_FCN (icode
) (target
));
28987 case VOID_FTYPE_UINT64
:
28988 case VOID_FTYPE_UNSIGNED
:
28994 case INT_FTYPE_VOID
:
28995 case UINT64_FTYPE_VOID
:
28996 case UNSIGNED_FTYPE_VOID
:
29001 case UINT64_FTYPE_PUNSIGNED
:
29002 case V2DI_FTYPE_PV2DI
:
29003 case V4DI_FTYPE_PV4DI
:
29004 case V32QI_FTYPE_PCCHAR
:
29005 case V16QI_FTYPE_PCCHAR
:
29006 case V8SF_FTYPE_PCV4SF
:
29007 case V8SF_FTYPE_PCFLOAT
:
29008 case V4SF_FTYPE_PCFLOAT
:
29009 case V4DF_FTYPE_PCV2DF
:
29010 case V4DF_FTYPE_PCDOUBLE
:
29011 case V2DF_FTYPE_PCDOUBLE
:
29012 case VOID_FTYPE_PVOID
:
29017 case VOID_FTYPE_PV2SF_V4SF
:
29018 case VOID_FTYPE_PV4DI_V4DI
:
29019 case VOID_FTYPE_PV2DI_V2DI
:
29020 case VOID_FTYPE_PCHAR_V32QI
:
29021 case VOID_FTYPE_PCHAR_V16QI
:
29022 case VOID_FTYPE_PFLOAT_V8SF
:
29023 case VOID_FTYPE_PFLOAT_V4SF
:
29024 case VOID_FTYPE_PDOUBLE_V4DF
:
29025 case VOID_FTYPE_PDOUBLE_V2DF
:
29026 case VOID_FTYPE_PLONGLONG_LONGLONG
:
29027 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
29028 case VOID_FTYPE_PINT_INT
:
29031 /* Reserve memory operand for target. */
29032 memory
= ARRAY_SIZE (args
);
29034 case V4SF_FTYPE_V4SF_PCV2SF
:
29035 case V2DF_FTYPE_V2DF_PCDOUBLE
:
29040 case V8SF_FTYPE_PCV8SF_V8SI
:
29041 case V4DF_FTYPE_PCV4DF_V4DI
:
29042 case V4SF_FTYPE_PCV4SF_V4SI
:
29043 case V2DF_FTYPE_PCV2DF_V2DI
:
29044 case V8SI_FTYPE_PCV8SI_V8SI
:
29045 case V4DI_FTYPE_PCV4DI_V4DI
:
29046 case V4SI_FTYPE_PCV4SI_V4SI
:
29047 case V2DI_FTYPE_PCV2DI_V2DI
:
29052 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
29053 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
29054 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
29055 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
29056 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
29057 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
29058 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
29059 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
29062 /* Reserve memory operand for target. */
29063 memory
= ARRAY_SIZE (args
);
29065 case VOID_FTYPE_UINT_UINT_UINT
:
29066 case VOID_FTYPE_UINT64_UINT_UINT
:
29067 case UCHAR_FTYPE_UINT_UINT_UINT
:
29068 case UCHAR_FTYPE_UINT64_UINT_UINT
:
29071 memory
= ARRAY_SIZE (args
);
29072 last_arg_constant
= true;
29075 gcc_unreachable ();
29078 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29080 if (klass
== store
)
29082 arg
= CALL_EXPR_ARG (exp
, 0);
29083 op
= expand_normal (arg
);
29084 gcc_assert (target
== 0);
29087 if (GET_MODE (op
) != Pmode
)
29088 op
= convert_to_mode (Pmode
, op
, 1);
29089 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
29092 target
= force_reg (tmode
, op
);
29100 || GET_MODE (target
) != tmode
29101 || !insn_p
->operand
[0].predicate (target
, tmode
))
29102 target
= gen_reg_rtx (tmode
);
29105 for (i
= 0; i
< nargs
; i
++)
29107 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29110 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
29111 op
= expand_normal (arg
);
29112 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29114 if (last_arg_constant
&& (i
+ 1) == nargs
)
29118 if (icode
== CODE_FOR_lwp_lwpvalsi3
29119 || icode
== CODE_FOR_lwp_lwpinssi3
29120 || icode
== CODE_FOR_lwp_lwpvaldi3
29121 || icode
== CODE_FOR_lwp_lwpinsdi3
)
29122 error ("the last argument must be a 32-bit immediate");
29124 error ("the last argument must be an 8-bit immediate");
29132 /* This must be the memory operand. */
29133 if (GET_MODE (op
) != Pmode
)
29134 op
= convert_to_mode (Pmode
, op
, 1);
29135 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29136 gcc_assert (GET_MODE (op
) == mode
29137 || GET_MODE (op
) == VOIDmode
);
29141 /* This must be register. */
29142 if (VECTOR_MODE_P (mode
))
29143 op
= safe_vector_operand (op
, mode
);
29145 gcc_assert (GET_MODE (op
) == mode
29146 || GET_MODE (op
) == VOIDmode
);
29147 op
= copy_to_mode_reg (mode
, op
);
29152 args
[i
].mode
= mode
;
29158 pat
= GEN_FCN (icode
) (target
);
29161 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29164 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29167 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29170 gcc_unreachable ();
29176 return klass
== store
? 0 : target
;
29179 /* Return the integer constant in ARG. Constrain it to be in the range
29180 of the subparts of VEC_TYPE; issue an error if not. */
29183 get_element_number (tree vec_type
, tree arg
)
29185 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29187 if (!host_integerp (arg
, 1)
29188 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29190 error ("selector must be an integer constant in the range 0..%wi", max
);
29197 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29198 ix86_expand_vector_init. We DO have language-level syntax for this, in
29199 the form of (type){ init-list }. Except that since we can't place emms
29200 instructions from inside the compiler, we can't allow the use of MMX
29201 registers unless the user explicitly asks for it. So we do *not* define
29202 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29203 we have builtins invoked by mmintrin.h that gives us license to emit
29204 these sorts of instructions. */
29207 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29209 enum machine_mode tmode
= TYPE_MODE (type
);
29210 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29211 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29212 rtvec v
= rtvec_alloc (n_elt
);
29214 gcc_assert (VECTOR_MODE_P (tmode
));
29215 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29217 for (i
= 0; i
< n_elt
; ++i
)
29219 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29220 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29223 if (!target
|| !register_operand (target
, tmode
))
29224 target
= gen_reg_rtx (tmode
);
29226 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29230 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29231 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29232 had a language-level syntax for referencing vector elements. */
29235 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29237 enum machine_mode tmode
, mode0
;
29242 arg0
= CALL_EXPR_ARG (exp
, 0);
29243 arg1
= CALL_EXPR_ARG (exp
, 1);
29245 op0
= expand_normal (arg0
);
29246 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
29248 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29249 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
29250 gcc_assert (VECTOR_MODE_P (mode0
));
29252 op0
= force_reg (mode0
, op0
);
29254 if (optimize
|| !target
|| !register_operand (target
, tmode
))
29255 target
= gen_reg_rtx (tmode
);
29257 ix86_expand_vector_extract (true, target
, op0
, elt
);
29262 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29263 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29264 a language-level syntax for referencing vector elements. */
29267 ix86_expand_vec_set_builtin (tree exp
)
29269 enum machine_mode tmode
, mode1
;
29270 tree arg0
, arg1
, arg2
;
29272 rtx op0
, op1
, target
;
29274 arg0
= CALL_EXPR_ARG (exp
, 0);
29275 arg1
= CALL_EXPR_ARG (exp
, 1);
29276 arg2
= CALL_EXPR_ARG (exp
, 2);
29278 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
29279 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29280 gcc_assert (VECTOR_MODE_P (tmode
));
29282 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
29283 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
29284 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
29286 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
29287 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
29289 op0
= force_reg (tmode
, op0
);
29290 op1
= force_reg (mode1
, op1
);
29292 /* OP0 is the source of these builtin functions and shouldn't be
29293 modified. Create a copy, use it and return it as target. */
29294 target
= gen_reg_rtx (tmode
);
29295 emit_move_insn (target
, op0
);
29296 ix86_expand_vector_set (true, target
, op1
, elt
);
29301 /* Expand an expression EXP that calls a built-in function,
29302 with result going to TARGET if that's convenient
29303 (and in mode MODE if that's convenient).
29304 SUBTARGET may be used as the target for computing one of EXP's operands.
29305 IGNORE is nonzero if the value is to be ignored. */
29308 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
29309 enum machine_mode mode ATTRIBUTE_UNUSED
,
29310 int ignore ATTRIBUTE_UNUSED
)
29312 const struct builtin_description
*d
;
29314 enum insn_code icode
;
29315 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
29316 tree arg0
, arg1
, arg2
, arg3
, arg4
;
29317 rtx op0
, op1
, op2
, op3
, op4
, pat
;
29318 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
29319 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
29321 /* Determine whether the builtin function is available under the current ISA.
29322 Originally the builtin was not created if it wasn't applicable to the
29323 current ISA based on the command line switches. With function specific
29324 options, we need to check in the context of the function making the call
29325 whether it is supported. */
29326 if (ix86_builtins_isa
[fcode
].isa
29327 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
29329 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
29330 NULL
, (enum fpmath_unit
) 0, false);
29333 error ("%qE needs unknown isa option", fndecl
);
29336 gcc_assert (opts
!= NULL
);
29337 error ("%qE needs isa option %s", fndecl
, opts
);
29345 case IX86_BUILTIN_MASKMOVQ
:
29346 case IX86_BUILTIN_MASKMOVDQU
:
29347 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
29348 ? CODE_FOR_mmx_maskmovq
29349 : CODE_FOR_sse2_maskmovdqu
);
29350 /* Note the arg order is different from the operand order. */
29351 arg1
= CALL_EXPR_ARG (exp
, 0);
29352 arg2
= CALL_EXPR_ARG (exp
, 1);
29353 arg0
= CALL_EXPR_ARG (exp
, 2);
29354 op0
= expand_normal (arg0
);
29355 op1
= expand_normal (arg1
);
29356 op2
= expand_normal (arg2
);
29357 mode0
= insn_data
[icode
].operand
[0].mode
;
29358 mode1
= insn_data
[icode
].operand
[1].mode
;
29359 mode2
= insn_data
[icode
].operand
[2].mode
;
29361 if (GET_MODE (op0
) != Pmode
)
29362 op0
= convert_to_mode (Pmode
, op0
, 1);
29363 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
29365 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29366 op0
= copy_to_mode_reg (mode0
, op0
);
29367 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
29368 op1
= copy_to_mode_reg (mode1
, op1
);
29369 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
29370 op2
= copy_to_mode_reg (mode2
, op2
);
29371 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
29377 case IX86_BUILTIN_LDMXCSR
:
29378 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
29379 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29380 emit_move_insn (target
, op0
);
29381 emit_insn (gen_sse_ldmxcsr (target
));
29384 case IX86_BUILTIN_STMXCSR
:
29385 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29386 emit_insn (gen_sse_stmxcsr (target
));
29387 return copy_to_mode_reg (SImode
, target
);
29389 case IX86_BUILTIN_CLFLUSH
:
29390 arg0
= CALL_EXPR_ARG (exp
, 0);
29391 op0
= expand_normal (arg0
);
29392 icode
= CODE_FOR_sse2_clflush
;
29393 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29395 if (GET_MODE (op0
) != Pmode
)
29396 op0
= convert_to_mode (Pmode
, op0
, 1);
29397 op0
= force_reg (Pmode
, op0
);
29400 emit_insn (gen_sse2_clflush (op0
));
29403 case IX86_BUILTIN_MONITOR
:
29404 arg0
= CALL_EXPR_ARG (exp
, 0);
29405 arg1
= CALL_EXPR_ARG (exp
, 1);
29406 arg2
= CALL_EXPR_ARG (exp
, 2);
29407 op0
= expand_normal (arg0
);
29408 op1
= expand_normal (arg1
);
29409 op2
= expand_normal (arg2
);
29412 if (GET_MODE (op0
) != Pmode
)
29413 op0
= convert_to_mode (Pmode
, op0
, 1);
29414 op0
= force_reg (Pmode
, op0
);
29417 op1
= copy_to_mode_reg (SImode
, op1
);
29419 op2
= copy_to_mode_reg (SImode
, op2
);
29420 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
29423 case IX86_BUILTIN_MWAIT
:
29424 arg0
= CALL_EXPR_ARG (exp
, 0);
29425 arg1
= CALL_EXPR_ARG (exp
, 1);
29426 op0
= expand_normal (arg0
);
29427 op1
= expand_normal (arg1
);
29429 op0
= copy_to_mode_reg (SImode
, op0
);
29431 op1
= copy_to_mode_reg (SImode
, op1
);
29432 emit_insn (gen_sse3_mwait (op0
, op1
));
29435 case IX86_BUILTIN_VEC_INIT_V2SI
:
29436 case IX86_BUILTIN_VEC_INIT_V4HI
:
29437 case IX86_BUILTIN_VEC_INIT_V8QI
:
29438 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
29440 case IX86_BUILTIN_VEC_EXT_V2DF
:
29441 case IX86_BUILTIN_VEC_EXT_V2DI
:
29442 case IX86_BUILTIN_VEC_EXT_V4SF
:
29443 case IX86_BUILTIN_VEC_EXT_V4SI
:
29444 case IX86_BUILTIN_VEC_EXT_V8HI
:
29445 case IX86_BUILTIN_VEC_EXT_V2SI
:
29446 case IX86_BUILTIN_VEC_EXT_V4HI
:
29447 case IX86_BUILTIN_VEC_EXT_V16QI
:
29448 return ix86_expand_vec_ext_builtin (exp
, target
);
29450 case IX86_BUILTIN_VEC_SET_V2DI
:
29451 case IX86_BUILTIN_VEC_SET_V4SF
:
29452 case IX86_BUILTIN_VEC_SET_V4SI
:
29453 case IX86_BUILTIN_VEC_SET_V8HI
:
29454 case IX86_BUILTIN_VEC_SET_V4HI
:
29455 case IX86_BUILTIN_VEC_SET_V16QI
:
29456 return ix86_expand_vec_set_builtin (exp
);
29458 case IX86_BUILTIN_INFQ
:
29459 case IX86_BUILTIN_HUGE_VALQ
:
29461 REAL_VALUE_TYPE inf
;
29465 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
29467 tmp
= validize_mem (force_const_mem (mode
, tmp
));
29470 target
= gen_reg_rtx (mode
);
29472 emit_move_insn (target
, tmp
);
29476 case IX86_BUILTIN_LLWPCB
:
29477 arg0
= CALL_EXPR_ARG (exp
, 0);
29478 op0
= expand_normal (arg0
);
29479 icode
= CODE_FOR_lwp_llwpcb
;
29480 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29482 if (GET_MODE (op0
) != Pmode
)
29483 op0
= convert_to_mode (Pmode
, op0
, 1);
29484 op0
= force_reg (Pmode
, op0
);
29486 emit_insn (gen_lwp_llwpcb (op0
));
29489 case IX86_BUILTIN_SLWPCB
:
29490 icode
= CODE_FOR_lwp_slwpcb
;
29492 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
29493 target
= gen_reg_rtx (Pmode
);
29494 emit_insn (gen_lwp_slwpcb (target
));
29497 case IX86_BUILTIN_BEXTRI32
:
29498 case IX86_BUILTIN_BEXTRI64
:
29499 arg0
= CALL_EXPR_ARG (exp
, 0);
29500 arg1
= CALL_EXPR_ARG (exp
, 1);
29501 op0
= expand_normal (arg0
);
29502 op1
= expand_normal (arg1
);
29503 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
29504 ? CODE_FOR_tbm_bextri_si
29505 : CODE_FOR_tbm_bextri_di
);
29506 if (!CONST_INT_P (op1
))
29508 error ("last argument must be an immediate");
29513 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
29514 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
29515 op1
= GEN_INT (length
);
29516 op2
= GEN_INT (lsb_index
);
29517 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
29523 case IX86_BUILTIN_RDRAND16_STEP
:
29524 icode
= CODE_FOR_rdrandhi_1
;
29528 case IX86_BUILTIN_RDRAND32_STEP
:
29529 icode
= CODE_FOR_rdrandsi_1
;
29533 case IX86_BUILTIN_RDRAND64_STEP
:
29534 icode
= CODE_FOR_rdranddi_1
;
29538 op0
= gen_reg_rtx (mode0
);
29539 emit_insn (GEN_FCN (icode
) (op0
));
29541 arg0
= CALL_EXPR_ARG (exp
, 0);
29542 op1
= expand_normal (arg0
);
29543 if (!address_operand (op1
, VOIDmode
))
29545 op1
= convert_memory_address (Pmode
, op1
);
29546 op1
= copy_addr_to_reg (op1
);
29548 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
29550 op1
= gen_reg_rtx (SImode
);
29551 emit_move_insn (op1
, CONST1_RTX (SImode
));
29553 /* Emit SImode conditional move. */
29554 if (mode0
== HImode
)
29556 op2
= gen_reg_rtx (SImode
);
29557 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
29559 else if (mode0
== SImode
)
29562 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
29565 target
= gen_reg_rtx (SImode
);
29567 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
29569 emit_insn (gen_rtx_SET (VOIDmode
, target
,
29570 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
29573 case IX86_BUILTIN_GATHERSIV2DF
:
29574 icode
= CODE_FOR_avx2_gathersiv2df
;
29576 case IX86_BUILTIN_GATHERSIV4DF
:
29577 icode
= CODE_FOR_avx2_gathersiv4df
;
29579 case IX86_BUILTIN_GATHERDIV2DF
:
29580 icode
= CODE_FOR_avx2_gatherdiv2df
;
29582 case IX86_BUILTIN_GATHERDIV4DF
:
29583 icode
= CODE_FOR_avx2_gatherdiv4df
;
29585 case IX86_BUILTIN_GATHERSIV4SF
:
29586 icode
= CODE_FOR_avx2_gathersiv4sf
;
29588 case IX86_BUILTIN_GATHERSIV8SF
:
29589 icode
= CODE_FOR_avx2_gathersiv8sf
;
29591 case IX86_BUILTIN_GATHERDIV4SF
:
29592 icode
= CODE_FOR_avx2_gatherdiv4sf
;
29594 case IX86_BUILTIN_GATHERDIV8SF
:
29595 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29597 case IX86_BUILTIN_GATHERSIV2DI
:
29598 icode
= CODE_FOR_avx2_gathersiv2di
;
29600 case IX86_BUILTIN_GATHERSIV4DI
:
29601 icode
= CODE_FOR_avx2_gathersiv4di
;
29603 case IX86_BUILTIN_GATHERDIV2DI
:
29604 icode
= CODE_FOR_avx2_gatherdiv2di
;
29606 case IX86_BUILTIN_GATHERDIV4DI
:
29607 icode
= CODE_FOR_avx2_gatherdiv4di
;
29609 case IX86_BUILTIN_GATHERSIV4SI
:
29610 icode
= CODE_FOR_avx2_gathersiv4si
;
29612 case IX86_BUILTIN_GATHERSIV8SI
:
29613 icode
= CODE_FOR_avx2_gathersiv8si
;
29615 case IX86_BUILTIN_GATHERDIV4SI
:
29616 icode
= CODE_FOR_avx2_gatherdiv4si
;
29618 case IX86_BUILTIN_GATHERDIV8SI
:
29619 icode
= CODE_FOR_avx2_gatherdiv8si
;
29621 case IX86_BUILTIN_GATHERALTSIV4DF
:
29622 icode
= CODE_FOR_avx2_gathersiv4df
;
29624 case IX86_BUILTIN_GATHERALTDIV8SF
:
29625 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29627 case IX86_BUILTIN_GATHERALTSIV4DI
:
29628 icode
= CODE_FOR_avx2_gathersiv4di
;
29630 case IX86_BUILTIN_GATHERALTDIV8SI
:
29631 icode
= CODE_FOR_avx2_gatherdiv8si
;
29635 arg0
= CALL_EXPR_ARG (exp
, 0);
29636 arg1
= CALL_EXPR_ARG (exp
, 1);
29637 arg2
= CALL_EXPR_ARG (exp
, 2);
29638 arg3
= CALL_EXPR_ARG (exp
, 3);
29639 arg4
= CALL_EXPR_ARG (exp
, 4);
29640 op0
= expand_normal (arg0
);
29641 op1
= expand_normal (arg1
);
29642 op2
= expand_normal (arg2
);
29643 op3
= expand_normal (arg3
);
29644 op4
= expand_normal (arg4
);
29645 /* Note the arg order is different from the operand order. */
29646 mode0
= insn_data
[icode
].operand
[1].mode
;
29647 mode2
= insn_data
[icode
].operand
[3].mode
;
29648 mode3
= insn_data
[icode
].operand
[4].mode
;
29649 mode4
= insn_data
[icode
].operand
[5].mode
;
29651 if (target
== NULL_RTX
29652 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
29653 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
29655 subtarget
= target
;
29657 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
29658 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
29660 rtx half
= gen_reg_rtx (V4SImode
);
29661 if (!nonimmediate_operand (op2
, V8SImode
))
29662 op2
= copy_to_mode_reg (V8SImode
, op2
);
29663 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
29666 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
29667 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
29669 rtx (*gen
) (rtx
, rtx
);
29670 rtx half
= gen_reg_rtx (mode0
);
29671 if (mode0
== V4SFmode
)
29672 gen
= gen_vec_extract_lo_v8sf
;
29674 gen
= gen_vec_extract_lo_v8si
;
29675 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
29676 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
29677 emit_insn (gen (half
, op0
));
29679 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
29680 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
29681 emit_insn (gen (half
, op3
));
29685 /* Force memory operand only with base register here. But we
29686 don't want to do it on memory operand for other builtin
29688 if (GET_MODE (op1
) != Pmode
)
29689 op1
= convert_to_mode (Pmode
, op1
, 1);
29690 op1
= force_reg (Pmode
, op1
);
29692 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
29693 op0
= copy_to_mode_reg (mode0
, op0
);
29694 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
29695 op1
= copy_to_mode_reg (Pmode
, op1
);
29696 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
29697 op2
= copy_to_mode_reg (mode2
, op2
);
29698 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
29699 op3
= copy_to_mode_reg (mode3
, op3
);
29700 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
29702 error ("last argument must be scale 1, 2, 4, 8");
29706 /* Optimize. If mask is known to have all high bits set,
29707 replace op0 with pc_rtx to signal that the instruction
29708 overwrites the whole destination and doesn't use its
29709 previous contents. */
29712 if (TREE_CODE (arg3
) == VECTOR_CST
)
29714 unsigned int negative
= 0;
29715 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
29717 tree cst
= VECTOR_CST_ELT (arg3
, i
);
29718 if (TREE_CODE (cst
) == INTEGER_CST
29719 && tree_int_cst_sign_bit (cst
))
29721 else if (TREE_CODE (cst
) == REAL_CST
29722 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
29725 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
29728 else if (TREE_CODE (arg3
) == SSA_NAME
)
29730 /* Recognize also when mask is like:
29731 __v2df src = _mm_setzero_pd ();
29732 __v2df mask = _mm_cmpeq_pd (src, src);
29734 __v8sf src = _mm256_setzero_ps ();
29735 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29736 as that is a cheaper way to load all ones into
29737 a register than having to load a constant from
29739 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
29740 if (is_gimple_call (def_stmt
))
29742 tree fndecl
= gimple_call_fndecl (def_stmt
);
29744 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29745 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
29747 case IX86_BUILTIN_CMPPD
:
29748 case IX86_BUILTIN_CMPPS
:
29749 case IX86_BUILTIN_CMPPD256
:
29750 case IX86_BUILTIN_CMPPS256
:
29751 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
29754 case IX86_BUILTIN_CMPEQPD
:
29755 case IX86_BUILTIN_CMPEQPS
:
29756 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
29757 && initializer_zerop (gimple_call_arg (def_stmt
,
29768 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
29773 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
29774 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
29776 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
29777 ? V4SFmode
: V4SImode
;
29778 if (target
== NULL_RTX
)
29779 target
= gen_reg_rtx (tmode
);
29780 if (tmode
== V4SFmode
)
29781 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
29783 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
29786 target
= subtarget
;
29790 case IX86_BUILTIN_XABORT
:
29791 icode
= CODE_FOR_xabort
;
29792 arg0
= CALL_EXPR_ARG (exp
, 0);
29793 op0
= expand_normal (arg0
);
29794 mode0
= insn_data
[icode
].operand
[0].mode
;
29795 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29797 error ("the xabort's argument must be an 8-bit immediate");
29800 emit_insn (gen_xabort (op0
));
29807 for (i
= 0, d
= bdesc_special_args
;
29808 i
< ARRAY_SIZE (bdesc_special_args
);
29810 if (d
->code
== fcode
)
29811 return ix86_expand_special_args_builtin (d
, exp
, target
);
29813 for (i
= 0, d
= bdesc_args
;
29814 i
< ARRAY_SIZE (bdesc_args
);
29816 if (d
->code
== fcode
)
29819 case IX86_BUILTIN_FABSQ
:
29820 case IX86_BUILTIN_COPYSIGNQ
:
29822 /* Emit a normal call if SSE2 isn't available. */
29823 return expand_call (exp
, target
, ignore
);
29825 return ix86_expand_args_builtin (d
, exp
, target
);
29828 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
29829 if (d
->code
== fcode
)
29830 return ix86_expand_sse_comi (d
, exp
, target
);
29832 for (i
= 0, d
= bdesc_pcmpestr
;
29833 i
< ARRAY_SIZE (bdesc_pcmpestr
);
29835 if (d
->code
== fcode
)
29836 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
29838 for (i
= 0, d
= bdesc_pcmpistr
;
29839 i
< ARRAY_SIZE (bdesc_pcmpistr
);
29841 if (d
->code
== fcode
)
29842 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
29844 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29845 if (d
->code
== fcode
)
29846 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
29847 (enum ix86_builtin_func_type
)
29848 d
->flag
, d
->comparison
);
29850 gcc_unreachable ();
29853 /* Returns a function decl for a vectorized version of the builtin function
29854 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29855 if it is not available. */
29858 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
29861 enum machine_mode in_mode
, out_mode
;
29863 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29865 if (TREE_CODE (type_out
) != VECTOR_TYPE
29866 || TREE_CODE (type_in
) != VECTOR_TYPE
29867 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
29870 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29871 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29872 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29873 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29877 case BUILT_IN_SQRT
:
29878 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29880 if (out_n
== 2 && in_n
== 2)
29881 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
29882 else if (out_n
== 4 && in_n
== 4)
29883 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
29887 case BUILT_IN_SQRTF
:
29888 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29890 if (out_n
== 4 && in_n
== 4)
29891 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
29892 else if (out_n
== 8 && in_n
== 8)
29893 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
29897 case BUILT_IN_IFLOOR
:
29898 case BUILT_IN_LFLOOR
:
29899 case BUILT_IN_LLFLOOR
:
29900 /* The round insn does not trap on denormals. */
29901 if (flag_trapping_math
|| !TARGET_ROUND
)
29904 if (out_mode
== SImode
&& in_mode
== DFmode
)
29906 if (out_n
== 4 && in_n
== 2)
29907 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
29908 else if (out_n
== 8 && in_n
== 4)
29909 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
29913 case BUILT_IN_IFLOORF
:
29914 case BUILT_IN_LFLOORF
:
29915 case BUILT_IN_LLFLOORF
:
29916 /* The round insn does not trap on denormals. */
29917 if (flag_trapping_math
|| !TARGET_ROUND
)
29920 if (out_mode
== SImode
&& in_mode
== SFmode
)
29922 if (out_n
== 4 && in_n
== 4)
29923 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
29924 else if (out_n
== 8 && in_n
== 8)
29925 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
29929 case BUILT_IN_ICEIL
:
29930 case BUILT_IN_LCEIL
:
29931 case BUILT_IN_LLCEIL
:
29932 /* The round insn does not trap on denormals. */
29933 if (flag_trapping_math
|| !TARGET_ROUND
)
29936 if (out_mode
== SImode
&& in_mode
== DFmode
)
29938 if (out_n
== 4 && in_n
== 2)
29939 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
29940 else if (out_n
== 8 && in_n
== 4)
29941 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
29945 case BUILT_IN_ICEILF
:
29946 case BUILT_IN_LCEILF
:
29947 case BUILT_IN_LLCEILF
:
29948 /* The round insn does not trap on denormals. */
29949 if (flag_trapping_math
|| !TARGET_ROUND
)
29952 if (out_mode
== SImode
&& in_mode
== SFmode
)
29954 if (out_n
== 4 && in_n
== 4)
29955 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
29956 else if (out_n
== 8 && in_n
== 8)
29957 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
29961 case BUILT_IN_IRINT
:
29962 case BUILT_IN_LRINT
:
29963 case BUILT_IN_LLRINT
:
29964 if (out_mode
== SImode
&& in_mode
== DFmode
)
29966 if (out_n
== 4 && in_n
== 2)
29967 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
29968 else if (out_n
== 8 && in_n
== 4)
29969 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
29973 case BUILT_IN_IRINTF
:
29974 case BUILT_IN_LRINTF
:
29975 case BUILT_IN_LLRINTF
:
29976 if (out_mode
== SImode
&& in_mode
== SFmode
)
29978 if (out_n
== 4 && in_n
== 4)
29979 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
29980 else if (out_n
== 8 && in_n
== 8)
29981 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
29985 case BUILT_IN_IROUND
:
29986 case BUILT_IN_LROUND
:
29987 case BUILT_IN_LLROUND
:
29988 /* The round insn does not trap on denormals. */
29989 if (flag_trapping_math
|| !TARGET_ROUND
)
29992 if (out_mode
== SImode
&& in_mode
== DFmode
)
29994 if (out_n
== 4 && in_n
== 2)
29995 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
29996 else if (out_n
== 8 && in_n
== 4)
29997 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
30001 case BUILT_IN_IROUNDF
:
30002 case BUILT_IN_LROUNDF
:
30003 case BUILT_IN_LLROUNDF
:
30004 /* The round insn does not trap on denormals. */
30005 if (flag_trapping_math
|| !TARGET_ROUND
)
30008 if (out_mode
== SImode
&& in_mode
== SFmode
)
30010 if (out_n
== 4 && in_n
== 4)
30011 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
30012 else if (out_n
== 8 && in_n
== 8)
30013 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
30017 case BUILT_IN_COPYSIGN
:
30018 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30020 if (out_n
== 2 && in_n
== 2)
30021 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
30022 else if (out_n
== 4 && in_n
== 4)
30023 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
30027 case BUILT_IN_COPYSIGNF
:
30028 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30030 if (out_n
== 4 && in_n
== 4)
30031 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
30032 else if (out_n
== 8 && in_n
== 8)
30033 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
30037 case BUILT_IN_FLOOR
:
30038 /* The round insn does not trap on denormals. */
30039 if (flag_trapping_math
|| !TARGET_ROUND
)
30042 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30044 if (out_n
== 2 && in_n
== 2)
30045 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
30046 else if (out_n
== 4 && in_n
== 4)
30047 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
30051 case BUILT_IN_FLOORF
:
30052 /* The round insn does not trap on denormals. */
30053 if (flag_trapping_math
|| !TARGET_ROUND
)
30056 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30058 if (out_n
== 4 && in_n
== 4)
30059 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
30060 else if (out_n
== 8 && in_n
== 8)
30061 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
30065 case BUILT_IN_CEIL
:
30066 /* The round insn does not trap on denormals. */
30067 if (flag_trapping_math
|| !TARGET_ROUND
)
30070 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30072 if (out_n
== 2 && in_n
== 2)
30073 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
30074 else if (out_n
== 4 && in_n
== 4)
30075 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
30079 case BUILT_IN_CEILF
:
30080 /* The round insn does not trap on denormals. */
30081 if (flag_trapping_math
|| !TARGET_ROUND
)
30084 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30086 if (out_n
== 4 && in_n
== 4)
30087 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
30088 else if (out_n
== 8 && in_n
== 8)
30089 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
30093 case BUILT_IN_TRUNC
:
30094 /* The round insn does not trap on denormals. */
30095 if (flag_trapping_math
|| !TARGET_ROUND
)
30098 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30100 if (out_n
== 2 && in_n
== 2)
30101 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
30102 else if (out_n
== 4 && in_n
== 4)
30103 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
30107 case BUILT_IN_TRUNCF
:
30108 /* The round insn does not trap on denormals. */
30109 if (flag_trapping_math
|| !TARGET_ROUND
)
30112 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30114 if (out_n
== 4 && in_n
== 4)
30115 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
30116 else if (out_n
== 8 && in_n
== 8)
30117 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
30121 case BUILT_IN_RINT
:
30122 /* The round insn does not trap on denormals. */
30123 if (flag_trapping_math
|| !TARGET_ROUND
)
30126 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30128 if (out_n
== 2 && in_n
== 2)
30129 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
30130 else if (out_n
== 4 && in_n
== 4)
30131 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
30135 case BUILT_IN_RINTF
:
30136 /* The round insn does not trap on denormals. */
30137 if (flag_trapping_math
|| !TARGET_ROUND
)
30140 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30142 if (out_n
== 4 && in_n
== 4)
30143 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
30144 else if (out_n
== 8 && in_n
== 8)
30145 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
30149 case BUILT_IN_ROUND
:
30150 /* The round insn does not trap on denormals. */
30151 if (flag_trapping_math
|| !TARGET_ROUND
)
30154 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30156 if (out_n
== 2 && in_n
== 2)
30157 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
30158 else if (out_n
== 4 && in_n
== 4)
30159 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
30163 case BUILT_IN_ROUNDF
:
30164 /* The round insn does not trap on denormals. */
30165 if (flag_trapping_math
|| !TARGET_ROUND
)
30168 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30170 if (out_n
== 4 && in_n
== 4)
30171 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
30172 else if (out_n
== 8 && in_n
== 8)
30173 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
30178 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30180 if (out_n
== 2 && in_n
== 2)
30181 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
30182 if (out_n
== 4 && in_n
== 4)
30183 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
30187 case BUILT_IN_FMAF
:
30188 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30190 if (out_n
== 4 && in_n
== 4)
30191 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
30192 if (out_n
== 8 && in_n
== 8)
30193 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
30201 /* Dispatch to a handler for a vectorization library. */
30202 if (ix86_veclib_handler
)
30203 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
30209 /* Handler for an SVML-style interface to
30210 a library with vectorized intrinsics. */
30213 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
30216 tree fntype
, new_fndecl
, args
;
30219 enum machine_mode el_mode
, in_mode
;
30222 /* The SVML is suitable for unsafe math only. */
30223 if (!flag_unsafe_math_optimizations
)
30226 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30227 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30228 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30229 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30230 if (el_mode
!= in_mode
30238 case BUILT_IN_LOG10
:
30240 case BUILT_IN_TANH
:
30242 case BUILT_IN_ATAN
:
30243 case BUILT_IN_ATAN2
:
30244 case BUILT_IN_ATANH
:
30245 case BUILT_IN_CBRT
:
30246 case BUILT_IN_SINH
:
30248 case BUILT_IN_ASINH
:
30249 case BUILT_IN_ASIN
:
30250 case BUILT_IN_COSH
:
30252 case BUILT_IN_ACOSH
:
30253 case BUILT_IN_ACOS
:
30254 if (el_mode
!= DFmode
|| n
!= 2)
30258 case BUILT_IN_EXPF
:
30259 case BUILT_IN_LOGF
:
30260 case BUILT_IN_LOG10F
:
30261 case BUILT_IN_POWF
:
30262 case BUILT_IN_TANHF
:
30263 case BUILT_IN_TANF
:
30264 case BUILT_IN_ATANF
:
30265 case BUILT_IN_ATAN2F
:
30266 case BUILT_IN_ATANHF
:
30267 case BUILT_IN_CBRTF
:
30268 case BUILT_IN_SINHF
:
30269 case BUILT_IN_SINF
:
30270 case BUILT_IN_ASINHF
:
30271 case BUILT_IN_ASINF
:
30272 case BUILT_IN_COSHF
:
30273 case BUILT_IN_COSF
:
30274 case BUILT_IN_ACOSHF
:
30275 case BUILT_IN_ACOSF
:
30276 if (el_mode
!= SFmode
|| n
!= 4)
30284 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30286 if (fn
== BUILT_IN_LOGF
)
30287 strcpy (name
, "vmlsLn4");
30288 else if (fn
== BUILT_IN_LOG
)
30289 strcpy (name
, "vmldLn2");
30292 sprintf (name
, "vmls%s", bname
+10);
30293 name
[strlen (name
)-1] = '4';
30296 sprintf (name
, "vmld%s2", bname
+10);
30298 /* Convert to uppercase. */
30302 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30304 args
= TREE_CHAIN (args
))
30308 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30310 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30312 /* Build a function declaration for the vectorized function. */
30313 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30314 FUNCTION_DECL
, get_identifier (name
), fntype
);
30315 TREE_PUBLIC (new_fndecl
) = 1;
30316 DECL_EXTERNAL (new_fndecl
) = 1;
30317 DECL_IS_NOVOPS (new_fndecl
) = 1;
30318 TREE_READONLY (new_fndecl
) = 1;
30323 /* Handler for an ACML-style interface to
30324 a library with vectorized intrinsics. */
30327 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
30329 char name
[20] = "__vr.._";
30330 tree fntype
, new_fndecl
, args
;
30333 enum machine_mode el_mode
, in_mode
;
30336 /* The ACML is 64bits only and suitable for unsafe math only as
30337 it does not correctly support parts of IEEE with the required
30338 precision such as denormals. */
30340 || !flag_unsafe_math_optimizations
)
30343 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30344 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30345 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30346 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30347 if (el_mode
!= in_mode
30357 case BUILT_IN_LOG2
:
30358 case BUILT_IN_LOG10
:
30361 if (el_mode
!= DFmode
30366 case BUILT_IN_SINF
:
30367 case BUILT_IN_COSF
:
30368 case BUILT_IN_EXPF
:
30369 case BUILT_IN_POWF
:
30370 case BUILT_IN_LOGF
:
30371 case BUILT_IN_LOG2F
:
30372 case BUILT_IN_LOG10F
:
30375 if (el_mode
!= SFmode
30384 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30385 sprintf (name
+ 7, "%s", bname
+10);
30388 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30390 args
= TREE_CHAIN (args
))
30394 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30396 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30398 /* Build a function declaration for the vectorized function. */
30399 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30400 FUNCTION_DECL
, get_identifier (name
), fntype
);
30401 TREE_PUBLIC (new_fndecl
) = 1;
30402 DECL_EXTERNAL (new_fndecl
) = 1;
30403 DECL_IS_NOVOPS (new_fndecl
) = 1;
30404 TREE_READONLY (new_fndecl
) = 1;
30409 /* Returns a decl of a function that implements gather load with
30410 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30411 Return NULL_TREE if it is not available. */
30414 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
30415 const_tree index_type
, int scale
)
30418 enum ix86_builtins code
;
30423 if ((TREE_CODE (index_type
) != INTEGER_TYPE
30424 && !POINTER_TYPE_P (index_type
))
30425 || (TYPE_MODE (index_type
) != SImode
30426 && TYPE_MODE (index_type
) != DImode
))
30429 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
30432 /* v*gather* insn sign extends index to pointer mode. */
30433 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
30434 && TYPE_UNSIGNED (index_type
))
30439 || (scale
& (scale
- 1)) != 0)
30442 si
= TYPE_MODE (index_type
) == SImode
;
30443 switch (TYPE_MODE (mem_vectype
))
30446 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
30449 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
30452 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
30455 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
30458 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
30461 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
30464 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
30467 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
30473 return ix86_builtins
[code
];
30476 /* Returns a code for a target-specific builtin that implements
30477 reciprocal of the function, or NULL_TREE if not available. */
30480 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
30481 bool sqrt ATTRIBUTE_UNUSED
)
30483 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
30484 && flag_finite_math_only
&& !flag_trapping_math
30485 && flag_unsafe_math_optimizations
))
30489 /* Machine dependent builtins. */
30492 /* Vectorized version of sqrt to rsqrt conversion. */
30493 case IX86_BUILTIN_SQRTPS_NR
:
30494 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
30496 case IX86_BUILTIN_SQRTPS_NR256
:
30497 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
30503 /* Normal builtins. */
30506 /* Sqrt to rsqrt conversion. */
30507 case BUILT_IN_SQRTF
:
30508 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
30515 /* Helper for avx_vpermilps256_operand et al. This is also used by
30516 the expansion functions to turn the parallel back into a mask.
30517 The return value is 0 for no match and the imm8+1 for a match. */
30520 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
30522 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
30524 unsigned char ipar
[8];
30526 if (XVECLEN (par
, 0) != (int) nelt
)
30529 /* Validate that all of the elements are constants, and not totally
30530 out of range. Copy the data into an integral array to make the
30531 subsequent checks easier. */
30532 for (i
= 0; i
< nelt
; ++i
)
30534 rtx er
= XVECEXP (par
, 0, i
);
30535 unsigned HOST_WIDE_INT ei
;
30537 if (!CONST_INT_P (er
))
30548 /* In the 256-bit DFmode case, we can only move elements within
30550 for (i
= 0; i
< 2; ++i
)
30554 mask
|= ipar
[i
] << i
;
30556 for (i
= 2; i
< 4; ++i
)
30560 mask
|= (ipar
[i
] - 2) << i
;
30565 /* In the 256-bit SFmode case, we have full freedom of movement
30566 within the low 128-bit lane, but the high 128-bit lane must
30567 mirror the exact same pattern. */
30568 for (i
= 0; i
< 4; ++i
)
30569 if (ipar
[i
] + 4 != ipar
[i
+ 4])
30576 /* In the 128-bit case, we've full freedom in the placement of
30577 the elements from the source operand. */
30578 for (i
= 0; i
< nelt
; ++i
)
30579 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
30583 gcc_unreachable ();
30586 /* Make sure success has a non-zero value by adding one. */
30590 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30591 the expansion functions to turn the parallel back into a mask.
30592 The return value is 0 for no match and the imm8+1 for a match. */
30595 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
30597 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
30599 unsigned char ipar
[8];
30601 if (XVECLEN (par
, 0) != (int) nelt
)
30604 /* Validate that all of the elements are constants, and not totally
30605 out of range. Copy the data into an integral array to make the
30606 subsequent checks easier. */
30607 for (i
= 0; i
< nelt
; ++i
)
30609 rtx er
= XVECEXP (par
, 0, i
);
30610 unsigned HOST_WIDE_INT ei
;
30612 if (!CONST_INT_P (er
))
30615 if (ei
>= 2 * nelt
)
30620 /* Validate that the halves of the permute are halves. */
30621 for (i
= 0; i
< nelt2
- 1; ++i
)
30622 if (ipar
[i
] + 1 != ipar
[i
+ 1])
30624 for (i
= nelt2
; i
< nelt
- 1; ++i
)
30625 if (ipar
[i
] + 1 != ipar
[i
+ 1])
30628 /* Reconstruct the mask. */
30629 for (i
= 0; i
< 2; ++i
)
30631 unsigned e
= ipar
[i
* nelt2
];
30635 mask
|= e
<< (i
* 4);
30638 /* Make sure success has a non-zero value by adding one. */
30642 /* Store OPERAND to the memory after reload is completed. This means
30643 that we can't easily use assign_stack_local. */
30645 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
30649 gcc_assert (reload_completed
);
30650 if (ix86_using_red_zone ())
30652 result
= gen_rtx_MEM (mode
,
30653 gen_rtx_PLUS (Pmode
,
30655 GEN_INT (-RED_ZONE_SIZE
)));
30656 emit_move_insn (result
, operand
);
30658 else if (TARGET_64BIT
)
30664 operand
= gen_lowpart (DImode
, operand
);
30668 gen_rtx_SET (VOIDmode
,
30669 gen_rtx_MEM (DImode
,
30670 gen_rtx_PRE_DEC (DImode
,
30671 stack_pointer_rtx
)),
30675 gcc_unreachable ();
30677 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
30686 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
30688 gen_rtx_SET (VOIDmode
,
30689 gen_rtx_MEM (SImode
,
30690 gen_rtx_PRE_DEC (Pmode
,
30691 stack_pointer_rtx
)),
30694 gen_rtx_SET (VOIDmode
,
30695 gen_rtx_MEM (SImode
,
30696 gen_rtx_PRE_DEC (Pmode
,
30697 stack_pointer_rtx
)),
30702 /* Store HImodes as SImodes. */
30703 operand
= gen_lowpart (SImode
, operand
);
30707 gen_rtx_SET (VOIDmode
,
30708 gen_rtx_MEM (GET_MODE (operand
),
30709 gen_rtx_PRE_DEC (SImode
,
30710 stack_pointer_rtx
)),
30714 gcc_unreachable ();
30716 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
30721 /* Free operand from the memory. */
30723 ix86_free_from_memory (enum machine_mode mode
)
30725 if (!ix86_using_red_zone ())
30729 if (mode
== DImode
|| TARGET_64BIT
)
30733 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30734 to pop or add instruction if registers are available. */
30735 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
30736 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
30741 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30743 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30744 QImode must go into class Q_REGS.
30745 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30746 movdf to do mem-to-mem moves through integer regs. */
30749 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
30751 enum machine_mode mode
= GET_MODE (x
);
30753 /* We're only allowed to return a subclass of CLASS. Many of the
30754 following checks fail for NO_REGS, so eliminate that early. */
30755 if (regclass
== NO_REGS
)
30758 /* All classes can load zeros. */
30759 if (x
== CONST0_RTX (mode
))
30762 /* Force constants into memory if we are loading a (nonzero) constant into
30763 an MMX or SSE register. This is because there are no MMX/SSE instructions
30764 to load from a constant. */
30766 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
30769 /* Prefer SSE regs only, if we can use them for math. */
30770 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
30771 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
30773 /* Floating-point constants need more complex checks. */
30774 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
30776 /* General regs can load everything. */
30777 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
30780 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30781 zero above. We only want to wind up preferring 80387 registers if
30782 we plan on doing computation with them. */
30784 && standard_80387_constant_p (x
) > 0)
30786 /* Limit class to non-sse. */
30787 if (regclass
== FLOAT_SSE_REGS
)
30789 if (regclass
== FP_TOP_SSE_REGS
)
30791 if (regclass
== FP_SECOND_SSE_REGS
)
30792 return FP_SECOND_REG
;
30793 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
30800 /* Generally when we see PLUS here, it's the function invariant
30801 (plus soft-fp const_int). Which can only be computed into general
30803 if (GET_CODE (x
) == PLUS
)
30804 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
30806 /* QImode constants are easy to load, but non-constant QImode data
30807 must go into Q_REGS. */
30808 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
30810 if (reg_class_subset_p (regclass
, Q_REGS
))
30812 if (reg_class_subset_p (Q_REGS
, regclass
))
30820 /* Discourage putting floating-point values in SSE registers unless
30821 SSE math is being used, and likewise for the 387 registers. */
30823 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
30825 enum machine_mode mode
= GET_MODE (x
);
30827 /* Restrict the output reload class to the register bank that we are doing
30828 math on. If we would like not to return a subset of CLASS, reject this
30829 alternative: if reload cannot do this, it will still use its choice. */
30830 mode
= GET_MODE (x
);
30831 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
30832 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
30834 if (X87_FLOAT_MODE_P (mode
))
30836 if (regclass
== FP_TOP_SSE_REGS
)
30838 else if (regclass
== FP_SECOND_SSE_REGS
)
30839 return FP_SECOND_REG
;
30841 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
30848 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
30849 enum machine_mode mode
, secondary_reload_info
*sri
)
30851 /* Double-word spills from general registers to non-offsettable memory
30852 references (zero-extended addresses) require special handling. */
30855 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
30856 && rclass
== GENERAL_REGS
30857 && !offsettable_memref_p (x
))
30860 ? CODE_FOR_reload_noff_load
30861 : CODE_FOR_reload_noff_store
);
30862 /* Add the cost of moving address to a temporary. */
30863 sri
->extra_cost
= 1;
30868 /* QImode spills from non-QI registers require
30869 intermediate register on 32bit targets. */
30871 && !in_p
&& mode
== QImode
30872 && (rclass
== GENERAL_REGS
30873 || rclass
== LEGACY_REGS
30874 || rclass
== INDEX_REGS
))
30883 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
30884 regno
= true_regnum (x
);
30886 /* Return Q_REGS if the operand is in memory. */
30891 /* This condition handles corner case where an expression involving
30892 pointers gets vectorized. We're trying to use the address of a
30893 stack slot as a vector initializer.
30895 (set (reg:V2DI 74 [ vect_cst_.2 ])
30896 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
30898 Eventually frame gets turned into sp+offset like this:
30900 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30901 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30902 (const_int 392 [0x188]))))
30904 That later gets turned into:
30906 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30907 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30908 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
30910 We'll have the following reload recorded:
30912 Reload 0: reload_in (DI) =
30913 (plus:DI (reg/f:DI 7 sp)
30914 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
30915 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30916 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
30917 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
30918 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30919 reload_reg_rtx: (reg:V2DI 22 xmm1)
30921 Which isn't going to work since SSE instructions can't handle scalar
30922 additions. Returning GENERAL_REGS forces the addition into integer
30923 register and reload can handle subsequent reloads without problems. */
30925 if (in_p
&& GET_CODE (x
) == PLUS
30926 && SSE_CLASS_P (rclass
)
30927 && SCALAR_INT_MODE_P (mode
))
30928 return GENERAL_REGS
;
30933 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
30936 ix86_class_likely_spilled_p (reg_class_t rclass
)
30947 case SSE_FIRST_REG
:
30949 case FP_SECOND_REG
:
30959 /* If we are copying between general and FP registers, we need a memory
30960 location. The same is true for SSE and MMX registers.
30962 To optimize register_move_cost performance, allow inline variant.
30964 The macro can't work reliably when one of the CLASSES is class containing
30965 registers from multiple units (SSE, MMX, integer). We avoid this by never
30966 combining those units in single alternative in the machine description.
30967 Ensure that this constraint holds to avoid unexpected surprises.
30969 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
30970 enforce these sanity checks. */
30973 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
30974 enum machine_mode mode
, int strict
)
30976 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
30977 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
30978 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
30979 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
30980 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
30981 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
30983 gcc_assert (!strict
);
30987 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
30990 /* ??? This is a lie. We do have moves between mmx/general, and for
30991 mmx/sse2. But by saying we need secondary memory we discourage the
30992 register allocator from using the mmx registers unless needed. */
30993 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
30996 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
30998 /* SSE1 doesn't have any direct moves from other classes. */
31002 /* If the target says that inter-unit moves are more expensive
31003 than moving through memory, then don't generate them. */
31004 if (!TARGET_INTER_UNIT_MOVES
)
31007 /* Between SSE and general, we have moves no larger than word size. */
31008 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
31016 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31017 enum machine_mode mode
, int strict
)
31019 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
31022 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31024 On the 80386, this is the size of MODE in words,
31025 except in the FP regs, where a single reg is always enough. */
31027 static unsigned char
31028 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
31030 if (MAYBE_INTEGER_CLASS_P (rclass
))
31032 if (mode
== XFmode
)
31033 return (TARGET_64BIT
? 2 : 3);
31034 else if (mode
== XCmode
)
31035 return (TARGET_64BIT
? 4 : 6);
31037 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
31041 if (COMPLEX_MODE_P (mode
))
31048 /* Return true if the registers in CLASS cannot represent the change from
31049 modes FROM to TO. */
31052 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
31053 enum reg_class regclass
)
31058 /* x87 registers can't do subreg at all, as all values are reformatted
31059 to extended precision. */
31060 if (MAYBE_FLOAT_CLASS_P (regclass
))
31063 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
31065 /* Vector registers do not support QI or HImode loads. If we don't
31066 disallow a change to these modes, reload will assume it's ok to
31067 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31068 the vec_dupv4hi pattern. */
31069 if (GET_MODE_SIZE (from
) < 4)
31072 /* Vector registers do not support subreg with nonzero offsets, which
31073 are otherwise valid for integer registers. Since we can't see
31074 whether we have a nonzero offset from here, prohibit all
31075 nonparadoxical subregs changing size. */
31076 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
31083 /* Return the cost of moving data of mode M between a
31084 register and memory. A value of 2 is the default; this cost is
31085 relative to those in `REGISTER_MOVE_COST'.
31087 This function is used extensively by register_move_cost that is used to
31088 build tables at startup. Make it inline in this case.
31089 When IN is 2, return maximum of in and out move cost.
31091 If moving between registers and memory is more expensive than
31092 between two registers, you should define this macro to express the
31095 Model also increased moving costs of QImode registers in non
31099 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
31103 if (FLOAT_CLASS_P (regclass
))
31121 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
31122 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
31124 if (SSE_CLASS_P (regclass
))
31127 switch (GET_MODE_SIZE (mode
))
31142 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
31143 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
31145 if (MMX_CLASS_P (regclass
))
31148 switch (GET_MODE_SIZE (mode
))
31160 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
31161 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
31163 switch (GET_MODE_SIZE (mode
))
31166 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
31169 return ix86_cost
->int_store
[0];
31170 if (TARGET_PARTIAL_REG_DEPENDENCY
31171 && optimize_function_for_speed_p (cfun
))
31172 cost
= ix86_cost
->movzbl_load
;
31174 cost
= ix86_cost
->int_load
[0];
31176 return MAX (cost
, ix86_cost
->int_store
[0]);
31182 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
31184 return ix86_cost
->movzbl_load
;
31186 return ix86_cost
->int_store
[0] + 4;
31191 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
31192 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
31194 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31195 if (mode
== TFmode
)
31198 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
31200 cost
= ix86_cost
->int_load
[2];
31202 cost
= ix86_cost
->int_store
[2];
31203 return (cost
* (((int) GET_MODE_SIZE (mode
)
31204 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
31209 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
31212 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
31216 /* Return the cost of moving data from a register in class CLASS1 to
31217 one in class CLASS2.
31219 It is not required that the cost always equal 2 when FROM is the same as TO;
31220 on some machines it is expensive to move between registers if they are not
31221 general registers. */
31224 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
31225 reg_class_t class2_i
)
31227 enum reg_class class1
= (enum reg_class
) class1_i
;
31228 enum reg_class class2
= (enum reg_class
) class2_i
;
31230 /* In case we require secondary memory, compute cost of the store followed
31231 by load. In order to avoid bad register allocation choices, we need
31232 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31234 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
31238 cost
+= inline_memory_move_cost (mode
, class1
, 2);
31239 cost
+= inline_memory_move_cost (mode
, class2
, 2);
31241 /* In case of copying from general_purpose_register we may emit multiple
31242 stores followed by single load causing memory size mismatch stall.
31243 Count this as arbitrarily high cost of 20. */
31244 if (targetm
.class_max_nregs (class1
, mode
)
31245 > targetm
.class_max_nregs (class2
, mode
))
31248 /* In the case of FP/MMX moves, the registers actually overlap, and we
31249 have to switch modes in order to treat them differently. */
31250 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
31251 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
31257 /* Moves between SSE/MMX and integer unit are expensive. */
31258 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
31259 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31261 /* ??? By keeping returned value relatively high, we limit the number
31262 of moves between integer and MMX/SSE registers for all targets.
31263 Additionally, high value prevents problem with x86_modes_tieable_p(),
31264 where integer modes in MMX/SSE registers are not tieable
31265 because of missing QImode and HImode moves to, from or between
31266 MMX/SSE registers. */
31267 return MAX (8, ix86_cost
->mmxsse_to_integer
);
31269 if (MAYBE_FLOAT_CLASS_P (class1
))
31270 return ix86_cost
->fp_move
;
31271 if (MAYBE_SSE_CLASS_P (class1
))
31272 return ix86_cost
->sse_move
;
31273 if (MAYBE_MMX_CLASS_P (class1
))
31274 return ix86_cost
->mmx_move
;
31278 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31282 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
31284 /* Flags and only flags can only hold CCmode values. */
31285 if (CC_REGNO_P (regno
))
31286 return GET_MODE_CLASS (mode
) == MODE_CC
;
31287 if (GET_MODE_CLASS (mode
) == MODE_CC
31288 || GET_MODE_CLASS (mode
) == MODE_RANDOM
31289 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
31291 if (FP_REGNO_P (regno
))
31292 return VALID_FP_MODE_P (mode
);
31293 if (SSE_REGNO_P (regno
))
31295 /* We implement the move patterns for all vector modes into and
31296 out of SSE registers, even when no operation instructions
31297 are available. OImode move is available only when AVX is
31299 return ((TARGET_AVX
&& mode
== OImode
)
31300 || VALID_AVX256_REG_MODE (mode
)
31301 || VALID_SSE_REG_MODE (mode
)
31302 || VALID_SSE2_REG_MODE (mode
)
31303 || VALID_MMX_REG_MODE (mode
)
31304 || VALID_MMX_REG_MODE_3DNOW (mode
));
31306 if (MMX_REGNO_P (regno
))
31308 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31309 so if the register is available at all, then we can move data of
31310 the given mode into or out of it. */
31311 return (VALID_MMX_REG_MODE (mode
)
31312 || VALID_MMX_REG_MODE_3DNOW (mode
));
31315 if (mode
== QImode
)
31317 /* Take care for QImode values - they can be in non-QI regs,
31318 but then they do cause partial register stalls. */
31319 if (regno
<= BX_REG
|| TARGET_64BIT
)
31321 if (!TARGET_PARTIAL_REG_STALL
)
31323 return !can_create_pseudo_p ();
31325 /* We handle both integer and floats in the general purpose registers. */
31326 else if (VALID_INT_MODE_P (mode
))
31328 else if (VALID_FP_MODE_P (mode
))
31330 else if (VALID_DFP_MODE_P (mode
))
31332 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31333 on to use that value in smaller contexts, this can easily force a
31334 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31335 supporting DImode, allow it. */
31336 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
31342 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31343 tieable integer mode. */
31346 ix86_tieable_integer_mode_p (enum machine_mode mode
)
31355 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
31358 return TARGET_64BIT
;
31365 /* Return true if MODE1 is accessible in a register that can hold MODE2
31366 without copying. That is, all register classes that can hold MODE2
31367 can also hold MODE1. */
31370 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
31372 if (mode1
== mode2
)
31375 if (ix86_tieable_integer_mode_p (mode1
)
31376 && ix86_tieable_integer_mode_p (mode2
))
31379 /* MODE2 being XFmode implies fp stack or general regs, which means we
31380 can tie any smaller floating point modes to it. Note that we do not
31381 tie this with TFmode. */
31382 if (mode2
== XFmode
)
31383 return mode1
== SFmode
|| mode1
== DFmode
;
31385 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31386 that we can tie it with SFmode. */
31387 if (mode2
== DFmode
)
31388 return mode1
== SFmode
;
31390 /* If MODE2 is only appropriate for an SSE register, then tie with
31391 any other mode acceptable to SSE registers. */
31392 if (GET_MODE_SIZE (mode2
) == 32
31393 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31394 return (GET_MODE_SIZE (mode1
) == 32
31395 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31396 if (GET_MODE_SIZE (mode2
) == 16
31397 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31398 return (GET_MODE_SIZE (mode1
) == 16
31399 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31401 /* If MODE2 is appropriate for an MMX register, then tie
31402 with any other mode acceptable to MMX registers. */
31403 if (GET_MODE_SIZE (mode2
) == 8
31404 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
31405 return (GET_MODE_SIZE (mode1
) == 8
31406 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
31411 /* Compute a (partial) cost for rtx X. Return true if the complete
31412 cost has been computed, and false if subexpressions should be
31413 scanned. In either case, *TOTAL contains the cost result. */
31416 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int opno
, int *total
,
31419 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
31420 enum machine_mode mode
= GET_MODE (x
);
31421 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
31429 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
31431 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
31433 else if (flag_pic
&& SYMBOLIC_CONST (x
)
31435 || (!GET_CODE (x
) != LABEL_REF
31436 && (GET_CODE (x
) != SYMBOL_REF
31437 || !SYMBOL_REF_LOCAL_P (x
)))))
31444 if (mode
== VOIDmode
)
31447 switch (standard_80387_constant_p (x
))
31452 default: /* Other constants */
31457 /* Start with (MEM (SYMBOL_REF)), since that's where
31458 it'll probably end up. Add a penalty for size. */
31459 *total
= (COSTS_N_INSNS (1)
31460 + (flag_pic
!= 0 && !TARGET_64BIT
)
31461 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
31467 /* The zero extensions is often completely free on x86_64, so make
31468 it as cheap as possible. */
31469 if (TARGET_64BIT
&& mode
== DImode
31470 && GET_MODE (XEXP (x
, 0)) == SImode
)
31472 else if (TARGET_ZERO_EXTEND_WITH_AND
)
31473 *total
= cost
->add
;
31475 *total
= cost
->movzx
;
31479 *total
= cost
->movsx
;
31483 if (CONST_INT_P (XEXP (x
, 1))
31484 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
31486 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31489 *total
= cost
->add
;
31492 if ((value
== 2 || value
== 3)
31493 && cost
->lea
<= cost
->shift_const
)
31495 *total
= cost
->lea
;
31505 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
31507 if (CONST_INT_P (XEXP (x
, 1)))
31509 if (INTVAL (XEXP (x
, 1)) > 32)
31510 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
31512 *total
= cost
->shift_const
* 2;
31516 if (GET_CODE (XEXP (x
, 1)) == AND
)
31517 *total
= cost
->shift_var
* 2;
31519 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
31524 if (CONST_INT_P (XEXP (x
, 1)))
31525 *total
= cost
->shift_const
;
31527 *total
= cost
->shift_var
;
31535 gcc_assert (FLOAT_MODE_P (mode
));
31536 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
31538 /* ??? SSE scalar/vector cost should be used here. */
31539 /* ??? Bald assumption that fma has the same cost as fmul. */
31540 *total
= cost
->fmul
;
31541 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
31543 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31545 if (GET_CODE (sub
) == NEG
)
31546 sub
= XEXP (sub
, 0);
31547 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
31550 if (GET_CODE (sub
) == NEG
)
31551 sub
= XEXP (sub
, 0);
31552 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
31557 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31559 /* ??? SSE scalar cost should be used here. */
31560 *total
= cost
->fmul
;
31563 else if (X87_FLOAT_MODE_P (mode
))
31565 *total
= cost
->fmul
;
31568 else if (FLOAT_MODE_P (mode
))
31570 /* ??? SSE vector cost should be used here. */
31571 *total
= cost
->fmul
;
31576 rtx op0
= XEXP (x
, 0);
31577 rtx op1
= XEXP (x
, 1);
31579 if (CONST_INT_P (XEXP (x
, 1)))
31581 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31582 for (nbits
= 0; value
!= 0; value
&= value
- 1)
31586 /* This is arbitrary. */
31589 /* Compute costs correctly for widening multiplication. */
31590 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
31591 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
31592 == GET_MODE_SIZE (mode
))
31594 int is_mulwiden
= 0;
31595 enum machine_mode inner_mode
= GET_MODE (op0
);
31597 if (GET_CODE (op0
) == GET_CODE (op1
))
31598 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
31599 else if (CONST_INT_P (op1
))
31601 if (GET_CODE (op0
) == SIGN_EXTEND
)
31602 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
31605 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
31609 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
31612 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
31613 + nbits
* cost
->mult_bit
31614 + rtx_cost (op0
, outer_code
, opno
, speed
)
31615 + rtx_cost (op1
, outer_code
, opno
, speed
));
31624 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31625 /* ??? SSE cost should be used here. */
31626 *total
= cost
->fdiv
;
31627 else if (X87_FLOAT_MODE_P (mode
))
31628 *total
= cost
->fdiv
;
31629 else if (FLOAT_MODE_P (mode
))
31630 /* ??? SSE vector cost should be used here. */
31631 *total
= cost
->fdiv
;
31633 *total
= cost
->divide
[MODE_INDEX (mode
)];
31637 if (GET_MODE_CLASS (mode
) == MODE_INT
31638 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
31640 if (GET_CODE (XEXP (x
, 0)) == PLUS
31641 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
31642 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
31643 && CONSTANT_P (XEXP (x
, 1)))
31645 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
31646 if (val
== 2 || val
== 4 || val
== 8)
31648 *total
= cost
->lea
;
31649 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
31650 outer_code
, opno
, speed
);
31651 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
31652 outer_code
, opno
, speed
);
31653 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31657 else if (GET_CODE (XEXP (x
, 0)) == MULT
31658 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
31660 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
31661 if (val
== 2 || val
== 4 || val
== 8)
31663 *total
= cost
->lea
;
31664 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
31665 outer_code
, opno
, speed
);
31666 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31670 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
31672 *total
= cost
->lea
;
31673 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
31674 outer_code
, opno
, speed
);
31675 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
31676 outer_code
, opno
, speed
);
31677 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31684 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31686 /* ??? SSE cost should be used here. */
31687 *total
= cost
->fadd
;
31690 else if (X87_FLOAT_MODE_P (mode
))
31692 *total
= cost
->fadd
;
31695 else if (FLOAT_MODE_P (mode
))
31697 /* ??? SSE vector cost should be used here. */
31698 *total
= cost
->fadd
;
31706 if (!TARGET_64BIT
&& mode
== DImode
)
31708 *total
= (cost
->add
* 2
31709 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
31710 << (GET_MODE (XEXP (x
, 0)) != DImode
))
31711 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
31712 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
31718 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31720 /* ??? SSE cost should be used here. */
31721 *total
= cost
->fchs
;
31724 else if (X87_FLOAT_MODE_P (mode
))
31726 *total
= cost
->fchs
;
31729 else if (FLOAT_MODE_P (mode
))
31731 /* ??? SSE vector cost should be used here. */
31732 *total
= cost
->fchs
;
31738 if (!TARGET_64BIT
&& mode
== DImode
)
31739 *total
= cost
->add
* 2;
31741 *total
= cost
->add
;
31745 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
31746 && XEXP (XEXP (x
, 0), 1) == const1_rtx
31747 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
31748 && XEXP (x
, 1) == const0_rtx
)
31750 /* This kind of construct is implemented using test[bwl].
31751 Treat it as if we had an AND. */
31752 *total
= (cost
->add
31753 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
31754 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
31760 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
31765 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31766 /* ??? SSE cost should be used here. */
31767 *total
= cost
->fabs
;
31768 else if (X87_FLOAT_MODE_P (mode
))
31769 *total
= cost
->fabs
;
31770 else if (FLOAT_MODE_P (mode
))
31771 /* ??? SSE vector cost should be used here. */
31772 *total
= cost
->fabs
;
31776 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31777 /* ??? SSE cost should be used here. */
31778 *total
= cost
->fsqrt
;
31779 else if (X87_FLOAT_MODE_P (mode
))
31780 *total
= cost
->fsqrt
;
31781 else if (FLOAT_MODE_P (mode
))
31782 /* ??? SSE vector cost should be used here. */
31783 *total
= cost
->fsqrt
;
31787 if (XINT (x
, 1) == UNSPEC_TP
)
31794 case VEC_DUPLICATE
:
31795 /* ??? Assume all of these vector manipulation patterns are
31796 recognizable. In which case they all pretty much have the
31798 *total
= COSTS_N_INSNS (1);
31808 static int current_machopic_label_num
;
31810 /* Given a symbol name and its associated stub, write out the
31811 definition of the stub. */
31814 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
31816 unsigned int length
;
31817 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
31818 int label
= ++current_machopic_label_num
;
31820 /* For 64-bit we shouldn't get here. */
31821 gcc_assert (!TARGET_64BIT
);
31823 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31824 symb
= targetm
.strip_name_encoding (symb
);
31826 length
= strlen (stub
);
31827 binder_name
= XALLOCAVEC (char, length
+ 32);
31828 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
31830 length
= strlen (symb
);
31831 symbol_name
= XALLOCAVEC (char, length
+ 32);
31832 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
31834 sprintf (lazy_ptr_name
, "L%d$lz", label
);
31836 if (MACHOPIC_ATT_STUB
)
31837 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
31838 else if (MACHOPIC_PURE
)
31839 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
31841 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
31843 fprintf (file
, "%s:\n", stub
);
31844 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
31846 if (MACHOPIC_ATT_STUB
)
31848 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31850 else if (MACHOPIC_PURE
)
31853 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31854 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
31855 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31856 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31857 label
, lazy_ptr_name
, label
);
31858 fprintf (file
, "\tjmp\t*%%ecx\n");
31861 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
31863 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31864 it needs no stub-binding-helper. */
31865 if (MACHOPIC_ATT_STUB
)
31868 fprintf (file
, "%s:\n", binder_name
);
31872 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
31873 fprintf (file
, "\tpushl\t%%ecx\n");
31876 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
31878 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
31880 /* N.B. Keep the correspondence of these
31881 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
31882 old-pic/new-pic/non-pic stubs; altering this will break
31883 compatibility with existing dylibs. */
31886 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31887 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
31890 /* 16-byte -mdynamic-no-pic stub. */
31891 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
31893 fprintf (file
, "%s:\n", lazy_ptr_name
);
31894 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
31895 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
31897 #endif /* TARGET_MACHO */
31899 /* Order the registers for register allocator. */
31902 x86_order_regs_for_local_alloc (void)
31907 /* First allocate the local general purpose registers. */
31908 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
31909 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
31910 reg_alloc_order
[pos
++] = i
;
31912 /* Global general purpose registers. */
31913 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
31914 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
31915 reg_alloc_order
[pos
++] = i
;
31917 /* x87 registers come first in case we are doing FP math
31919 if (!TARGET_SSE_MATH
)
31920 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
31921 reg_alloc_order
[pos
++] = i
;
31923 /* SSE registers. */
31924 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
31925 reg_alloc_order
[pos
++] = i
;
31926 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
31927 reg_alloc_order
[pos
++] = i
;
31929 /* x87 registers. */
31930 if (TARGET_SSE_MATH
)
31931 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
31932 reg_alloc_order
[pos
++] = i
;
31934 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
31935 reg_alloc_order
[pos
++] = i
;
31937 /* Initialize the rest of array as we do not allocate some registers
31939 while (pos
< FIRST_PSEUDO_REGISTER
)
31940 reg_alloc_order
[pos
++] = 0;
31943 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
31944 in struct attribute_spec handler. */
31946 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
31948 int flags ATTRIBUTE_UNUSED
,
31949 bool *no_add_attrs
)
31951 if (TREE_CODE (*node
) != FUNCTION_TYPE
31952 && TREE_CODE (*node
) != METHOD_TYPE
31953 && TREE_CODE (*node
) != FIELD_DECL
31954 && TREE_CODE (*node
) != TYPE_DECL
)
31956 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31958 *no_add_attrs
= true;
31963 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
31965 *no_add_attrs
= true;
31968 if (is_attribute_p ("callee_pop_aggregate_return", name
))
31972 cst
= TREE_VALUE (args
);
31973 if (TREE_CODE (cst
) != INTEGER_CST
)
31975 warning (OPT_Wattributes
,
31976 "%qE attribute requires an integer constant argument",
31978 *no_add_attrs
= true;
31980 else if (compare_tree_int (cst
, 0) != 0
31981 && compare_tree_int (cst
, 1) != 0)
31983 warning (OPT_Wattributes
,
31984 "argument to %qE attribute is neither zero, nor one",
31986 *no_add_attrs
= true;
31995 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
31996 struct attribute_spec.handler. */
31998 ix86_handle_abi_attribute (tree
*node
, tree name
,
31999 tree args ATTRIBUTE_UNUSED
,
32000 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32002 if (TREE_CODE (*node
) != FUNCTION_TYPE
32003 && TREE_CODE (*node
) != METHOD_TYPE
32004 && TREE_CODE (*node
) != FIELD_DECL
32005 && TREE_CODE (*node
) != TYPE_DECL
)
32007 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32009 *no_add_attrs
= true;
32013 /* Can combine regparm with all attributes but fastcall. */
32014 if (is_attribute_p ("ms_abi", name
))
32016 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
32018 error ("ms_abi and sysv_abi attributes are not compatible");
32023 else if (is_attribute_p ("sysv_abi", name
))
32025 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
32027 error ("ms_abi and sysv_abi attributes are not compatible");
32036 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32037 struct attribute_spec.handler. */
32039 ix86_handle_struct_attribute (tree
*node
, tree name
,
32040 tree args ATTRIBUTE_UNUSED
,
32041 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32044 if (DECL_P (*node
))
32046 if (TREE_CODE (*node
) == TYPE_DECL
)
32047 type
= &TREE_TYPE (*node
);
32052 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
32053 || TREE_CODE (*type
) == UNION_TYPE
)))
32055 warning (OPT_Wattributes
, "%qE attribute ignored",
32057 *no_add_attrs
= true;
32060 else if ((is_attribute_p ("ms_struct", name
)
32061 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
32062 || ((is_attribute_p ("gcc_struct", name
)
32063 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
32065 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
32067 *no_add_attrs
= true;
32074 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
32075 tree args ATTRIBUTE_UNUSED
,
32076 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32078 if (TREE_CODE (*node
) != FUNCTION_DECL
)
32080 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32082 *no_add_attrs
= true;
32088 ix86_ms_bitfield_layout_p (const_tree record_type
)
32090 return ((TARGET_MS_BITFIELD_LAYOUT
32091 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
32092 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
32095 /* Returns an expression indicating where the this parameter is
32096 located on entry to the FUNCTION. */
32099 x86_this_parameter (tree function
)
32101 tree type
= TREE_TYPE (function
);
32102 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
32107 const int *parm_regs
;
32109 if (ix86_function_type_abi (type
) == MS_ABI
)
32110 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
32112 parm_regs
= x86_64_int_parameter_registers
;
32113 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
32116 nregs
= ix86_function_regparm (type
, function
);
32118 if (nregs
> 0 && !stdarg_p (type
))
32121 unsigned int ccvt
= ix86_get_callcvt (type
);
32123 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
32124 regno
= aggr
? DX_REG
: CX_REG
;
32125 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
32129 return gen_rtx_MEM (SImode
,
32130 plus_constant (stack_pointer_rtx
, 4));
32139 return gen_rtx_MEM (SImode
,
32140 plus_constant (stack_pointer_rtx
, 4));
32143 return gen_rtx_REG (SImode
, regno
);
32146 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
32149 /* Determine whether x86_output_mi_thunk can succeed. */
32152 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
32153 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
32154 HOST_WIDE_INT vcall_offset
, const_tree function
)
32156 /* 64-bit can handle anything. */
32160 /* For 32-bit, everything's fine if we have one free register. */
32161 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
32164 /* Need a free register for vcall_offset. */
32168 /* Need a free register for GOT references. */
32169 if (flag_pic
&& !targetm
.binds_local_p (function
))
32172 /* Otherwise ok. */
32176 /* Output the assembler code for a thunk function. THUNK_DECL is the
32177 declaration for the thunk function itself, FUNCTION is the decl for
32178 the target function. DELTA is an immediate constant offset to be
32179 added to THIS. If VCALL_OFFSET is nonzero, the word at
32180 *(*this + vcall_offset) should be added to THIS. */
32183 x86_output_mi_thunk (FILE *file
,
32184 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
32185 HOST_WIDE_INT vcall_offset
, tree function
)
32187 rtx this_param
= x86_this_parameter (function
);
32188 rtx this_reg
, tmp
, fnaddr
;
32190 emit_note (NOTE_INSN_PROLOGUE_END
);
32192 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32193 pull it in now and let DELTA benefit. */
32194 if (REG_P (this_param
))
32195 this_reg
= this_param
;
32196 else if (vcall_offset
)
32198 /* Put the this parameter into %eax. */
32199 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
32200 emit_move_insn (this_reg
, this_param
);
32203 this_reg
= NULL_RTX
;
32205 /* Adjust the this parameter by a fixed constant. */
32208 rtx delta_rtx
= GEN_INT (delta
);
32209 rtx delta_dst
= this_reg
? this_reg
: this_param
;
32213 if (!x86_64_general_operand (delta_rtx
, Pmode
))
32215 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
32216 emit_move_insn (tmp
, delta_rtx
);
32221 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
32224 /* Adjust the this parameter by a value stored in the vtable. */
32227 rtx vcall_addr
, vcall_mem
, this_mem
;
32228 unsigned int tmp_regno
;
32231 tmp_regno
= R10_REG
;
32234 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
32235 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
32236 tmp_regno
= AX_REG
;
32238 tmp_regno
= CX_REG
;
32240 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
32242 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
32243 if (Pmode
!= ptr_mode
)
32244 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
32245 emit_move_insn (tmp
, this_mem
);
32247 /* Adjust the this parameter. */
32248 vcall_addr
= plus_constant (tmp
, vcall_offset
);
32250 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
32252 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
32253 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
32254 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
32257 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
32258 if (Pmode
!= ptr_mode
)
32259 emit_insn (gen_addsi_1_zext (this_reg
,
32260 gen_rtx_REG (ptr_mode
,
32264 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
32267 /* If necessary, drop THIS back to its stack slot. */
32268 if (this_reg
&& this_reg
!= this_param
)
32269 emit_move_insn (this_param
, this_reg
);
32271 fnaddr
= XEXP (DECL_RTL (function
), 0);
32274 if (!flag_pic
|| targetm
.binds_local_p (function
)
32275 || cfun
->machine
->call_abi
== MS_ABI
)
32279 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
32280 tmp
= gen_rtx_CONST (Pmode
, tmp
);
32281 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
32286 if (!flag_pic
|| targetm
.binds_local_p (function
))
32289 else if (TARGET_MACHO
)
32291 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
32292 fnaddr
= XEXP (fnaddr
, 0);
32294 #endif /* TARGET_MACHO */
32297 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
32298 output_set_got (tmp
, NULL_RTX
);
32300 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
32301 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
32302 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
32306 /* Our sibling call patterns do not allow memories, because we have no
32307 predicate that can distinguish between frame and non-frame memory.
32308 For our purposes here, we can get away with (ab)using a jump pattern,
32309 because we're going to do no optimization. */
32310 if (MEM_P (fnaddr
))
32311 emit_jump_insn (gen_indirect_jump (fnaddr
));
32314 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
32315 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
32316 tmp
= emit_call_insn (tmp
);
32317 SIBLING_CALL_P (tmp
) = 1;
32321 /* Emit just enough of rest_of_compilation to get the insns emitted.
32322 Note that use_thunk calls assemble_start_function et al. */
32323 tmp
= get_insns ();
32324 insn_locators_alloc ();
32325 shorten_branches (tmp
);
32326 final_start_function (tmp
, file
, 1);
32327 final (tmp
, file
, 1);
32328 final_end_function ();
32332 x86_file_start (void)
32334 default_file_start ();
32336 darwin_file_start ();
32338 if (X86_FILE_START_VERSION_DIRECTIVE
)
32339 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
32340 if (X86_FILE_START_FLTUSED
)
32341 fputs ("\t.global\t__fltused\n", asm_out_file
);
32342 if (ix86_asm_dialect
== ASM_INTEL
)
32343 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
32347 x86_field_alignment (tree field
, int computed
)
32349 enum machine_mode mode
;
32350 tree type
= TREE_TYPE (field
);
32352 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
32354 mode
= TYPE_MODE (strip_array_types (type
));
32355 if (mode
== DFmode
|| mode
== DCmode
32356 || GET_MODE_CLASS (mode
) == MODE_INT
32357 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
32358 return MIN (32, computed
);
32362 /* Output assembler code to FILE to increment profiler label # LABELNO
32363 for profiling a function entry. */
32365 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
32367 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
32372 #ifndef NO_PROFILE_COUNTERS
32373 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
32376 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
32377 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
32379 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32383 #ifndef NO_PROFILE_COUNTERS
32384 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
32387 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
32391 #ifndef NO_PROFILE_COUNTERS
32392 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
32395 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32399 /* We don't have exact information about the insn sizes, but we may assume
32400 quite safely that we are informed about all 1 byte insns and memory
32401 address sizes. This is enough to eliminate unnecessary padding in
32405 min_insn_size (rtx insn
)
32409 if (!INSN_P (insn
) || !active_insn_p (insn
))
32412 /* Discard alignments we've emit and jump instructions. */
32413 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
32414 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
32416 if (JUMP_TABLE_DATA_P (insn
))
32419 /* Important case - calls are always 5 bytes.
32420 It is common to have many calls in the row. */
32422 && symbolic_reference_mentioned_p (PATTERN (insn
))
32423 && !SIBLING_CALL_P (insn
))
32425 len
= get_attr_length (insn
);
32429 /* For normal instructions we rely on get_attr_length being exact,
32430 with a few exceptions. */
32431 if (!JUMP_P (insn
))
32433 enum attr_type type
= get_attr_type (insn
);
32438 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
32439 || asm_noperands (PATTERN (insn
)) >= 0)
32446 /* Otherwise trust get_attr_length. */
32450 l
= get_attr_length_address (insn
);
32451 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
32460 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32462 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32466 ix86_avoid_jump_mispredicts (void)
32468 rtx insn
, start
= get_insns ();
32469 int nbytes
= 0, njumps
= 0;
32472 /* Look for all minimal intervals of instructions containing 4 jumps.
32473 The intervals are bounded by START and INSN. NBYTES is the total
32474 size of instructions in the interval including INSN and not including
32475 START. When the NBYTES is smaller than 16 bytes, it is possible
32476 that the end of START and INSN ends up in the same 16byte page.
32478 The smallest offset in the page INSN can start is the case where START
32479 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32480 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32482 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
32486 if (LABEL_P (insn
))
32488 int align
= label_to_alignment (insn
);
32489 int max_skip
= label_to_max_skip (insn
);
32493 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32494 already in the current 16 byte page, because otherwise
32495 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32496 bytes to reach 16 byte boundary. */
32498 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
32501 fprintf (dump_file
, "Label %i with max_skip %i\n",
32502 INSN_UID (insn
), max_skip
);
32505 while (nbytes
+ max_skip
>= 16)
32507 start
= NEXT_INSN (start
);
32508 if ((JUMP_P (start
)
32509 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32510 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32512 njumps
--, isjump
= 1;
32515 nbytes
-= min_insn_size (start
);
32521 min_size
= min_insn_size (insn
);
32522 nbytes
+= min_size
;
32524 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
32525 INSN_UID (insn
), min_size
);
32527 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
32528 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
32536 start
= NEXT_INSN (start
);
32537 if ((JUMP_P (start
)
32538 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32539 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32541 njumps
--, isjump
= 1;
32544 nbytes
-= min_insn_size (start
);
32546 gcc_assert (njumps
>= 0);
32548 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
32549 INSN_UID (start
), INSN_UID (insn
), nbytes
);
32551 if (njumps
== 3 && isjump
&& nbytes
< 16)
32553 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
32556 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
32557 INSN_UID (insn
), padsize
);
32558 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
32564 /* AMD Athlon works faster
32565 when RET is not destination of conditional jump or directly preceded
32566 by other jump instruction. We avoid the penalty by inserting NOP just
32567 before the RET instructions in such cases. */
32569 ix86_pad_returns (void)
32574 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32576 basic_block bb
= e
->src
;
32577 rtx ret
= BB_END (bb
);
32579 bool replace
= false;
32581 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
32582 || optimize_bb_for_size_p (bb
))
32584 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
32585 if (active_insn_p (prev
) || LABEL_P (prev
))
32587 if (prev
&& LABEL_P (prev
))
32592 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32593 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
32594 && !(e
->flags
& EDGE_FALLTHRU
))
32599 prev
= prev_active_insn (ret
);
32601 && ((JUMP_P (prev
) && any_condjump_p (prev
))
32604 /* Empty functions get branch mispredict even when
32605 the jump destination is not visible to us. */
32606 if (!prev
&& !optimize_function_for_size_p (cfun
))
32611 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
32617 /* Count the minimum number of instructions in BB. Return 4 if the
32618 number of instructions >= 4. */
32621 ix86_count_insn_bb (basic_block bb
)
32624 int insn_count
= 0;
32626 /* Count number of instructions in this block. Return 4 if the number
32627 of instructions >= 4. */
32628 FOR_BB_INSNS (bb
, insn
)
32630 /* Only happen in exit blocks. */
32632 && ANY_RETURN_P (PATTERN (insn
)))
32635 if (NONDEBUG_INSN_P (insn
)
32636 && GET_CODE (PATTERN (insn
)) != USE
32637 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
32640 if (insn_count
>= 4)
32649 /* Count the minimum number of instructions in code path in BB.
32650 Return 4 if the number of instructions >= 4. */
32653 ix86_count_insn (basic_block bb
)
32657 int min_prev_count
;
32659 /* Only bother counting instructions along paths with no
32660 more than 2 basic blocks between entry and exit. Given
32661 that BB has an edge to exit, determine if a predecessor
32662 of BB has an edge from entry. If so, compute the number
32663 of instructions in the predecessor block. If there
32664 happen to be multiple such blocks, compute the minimum. */
32665 min_prev_count
= 4;
32666 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32669 edge_iterator prev_ei
;
32671 if (e
->src
== ENTRY_BLOCK_PTR
)
32673 min_prev_count
= 0;
32676 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
32678 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
32680 int count
= ix86_count_insn_bb (e
->src
);
32681 if (count
< min_prev_count
)
32682 min_prev_count
= count
;
32688 if (min_prev_count
< 4)
32689 min_prev_count
+= ix86_count_insn_bb (bb
);
32691 return min_prev_count
;
32694 /* Pad short funtion to 4 instructions. */
32697 ix86_pad_short_function (void)
32702 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32704 rtx ret
= BB_END (e
->src
);
32705 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
32707 int insn_count
= ix86_count_insn (e
->src
);
32709 /* Pad short function. */
32710 if (insn_count
< 4)
32714 /* Find epilogue. */
32717 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
32718 insn
= PREV_INSN (insn
);
32723 /* Two NOPs count as one instruction. */
32724 insn_count
= 2 * (4 - insn_count
);
32725 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
32731 /* Implement machine specific optimizations. We implement padding of returns
32732 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32736 /* We are freeing block_for_insn in the toplev to keep compatibility
32737 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32738 compute_bb_for_insn ();
32740 /* Run the vzeroupper optimization if needed. */
32741 if (TARGET_VZEROUPPER
)
32742 move_or_delete_vzeroupper ();
32744 if (optimize
&& optimize_function_for_speed_p (cfun
))
32746 if (TARGET_PAD_SHORT_FUNCTION
)
32747 ix86_pad_short_function ();
32748 else if (TARGET_PAD_RETURNS
)
32749 ix86_pad_returns ();
32750 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32751 if (TARGET_FOUR_JUMP_LIMIT
)
32752 ix86_avoid_jump_mispredicts ();
32757 /* Return nonzero when QImode register that must be represented via REX prefix
32760 x86_extended_QIreg_mentioned_p (rtx insn
)
32763 extract_insn_cached (insn
);
32764 for (i
= 0; i
< recog_data
.n_operands
; i
++)
32765 if (REG_P (recog_data
.operand
[i
])
32766 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
32771 /* Return nonzero when P points to register encoded via REX prefix.
32772 Called via for_each_rtx. */
32774 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
32776 unsigned int regno
;
32779 regno
= REGNO (*p
);
32780 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
32783 /* Return true when INSN mentions register that must be encoded using REX
32786 x86_extended_reg_mentioned_p (rtx insn
)
32788 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
32789 extended_reg_mentioned_1
, NULL
);
32792 /* If profitable, negate (without causing overflow) integer constant
32793 of mode MODE at location LOC. Return true in this case. */
32795 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
32799 if (!CONST_INT_P (*loc
))
32805 /* DImode x86_64 constants must fit in 32 bits. */
32806 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
32817 gcc_unreachable ();
32820 /* Avoid overflows. */
32821 if (mode_signbit_p (mode
, *loc
))
32824 val
= INTVAL (*loc
);
32826 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32827 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32828 if ((val
< 0 && val
!= -128)
32831 *loc
= GEN_INT (-val
);
32838 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32839 optabs would emit if we didn't have TFmode patterns. */
32842 x86_emit_floatuns (rtx operands
[2])
32844 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
32845 enum machine_mode mode
, inmode
;
32847 inmode
= GET_MODE (operands
[1]);
32848 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
32851 in
= force_reg (inmode
, operands
[1]);
32852 mode
= GET_MODE (out
);
32853 neglab
= gen_label_rtx ();
32854 donelab
= gen_label_rtx ();
32855 f0
= gen_reg_rtx (mode
);
32857 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
32859 expand_float (out
, in
, 0);
32861 emit_jump_insn (gen_jump (donelab
));
32864 emit_label (neglab
);
32866 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
32868 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
32870 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
32872 expand_float (f0
, i0
, 0);
32874 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
32876 emit_label (donelab
);
32879 /* AVX2 does support 32-byte integer vector operations,
32880 thus the longest vector we are faced with is V32QImode. */
32881 #define MAX_VECT_LEN 32
32883 struct expand_vec_perm_d
32885 rtx target
, op0
, op1
;
32886 unsigned char perm
[MAX_VECT_LEN
];
32887 enum machine_mode vmode
;
32888 unsigned char nelt
;
32892 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
32893 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
32895 /* Get a vector mode of the same size as the original but with elements
32896 twice as wide. This is only guaranteed to apply to integral vectors. */
32898 static inline enum machine_mode
32899 get_mode_wider_vector (enum machine_mode o
)
32901 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
32902 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
32903 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
32904 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
32908 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32909 with all elements equal to VAR. Return true if successful. */
32912 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
32913 rtx target
, rtx val
)
32936 /* First attempt to recognize VAL as-is. */
32937 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
32938 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
32939 if (recog_memoized (insn
) < 0)
32942 /* If that fails, force VAL into a register. */
32945 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
32946 seq
= get_insns ();
32949 emit_insn_before (seq
, insn
);
32951 ok
= recog_memoized (insn
) >= 0;
32960 if (TARGET_SSE
|| TARGET_3DNOW_A
)
32964 val
= gen_lowpart (SImode
, val
);
32965 x
= gen_rtx_TRUNCATE (HImode
, val
);
32966 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
32967 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32980 struct expand_vec_perm_d dperm
;
32984 memset (&dperm
, 0, sizeof (dperm
));
32985 dperm
.target
= target
;
32986 dperm
.vmode
= mode
;
32987 dperm
.nelt
= GET_MODE_NUNITS (mode
);
32988 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
32990 /* Extend to SImode using a paradoxical SUBREG. */
32991 tmp1
= gen_reg_rtx (SImode
);
32992 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
32994 /* Insert the SImode value as low element of a V4SImode vector. */
32995 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
32996 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
32998 ok
= (expand_vec_perm_1 (&dperm
)
32999 || expand_vec_perm_broadcast_1 (&dperm
));
33011 /* Replicate the value once into the next wider mode and recurse. */
33013 enum machine_mode smode
, wsmode
, wvmode
;
33016 smode
= GET_MODE_INNER (mode
);
33017 wvmode
= get_mode_wider_vector (mode
);
33018 wsmode
= GET_MODE_INNER (wvmode
);
33020 val
= convert_modes (wsmode
, smode
, val
, true);
33021 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
33022 GEN_INT (GET_MODE_BITSIZE (smode
)),
33023 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33024 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
33026 x
= gen_lowpart (wvmode
, target
);
33027 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
33035 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
33036 rtx x
= gen_reg_rtx (hvmode
);
33038 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
33041 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
33042 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33051 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33052 whose ONE_VAR element is VAR, and other elements are zero. Return true
33056 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
33057 rtx target
, rtx var
, int one_var
)
33059 enum machine_mode vsimode
;
33062 bool use_vector_set
= false;
33067 /* For SSE4.1, we normally use vector set. But if the second
33068 element is zero and inter-unit moves are OK, we use movq
33070 use_vector_set
= (TARGET_64BIT
33072 && !(TARGET_INTER_UNIT_MOVES
33078 use_vector_set
= TARGET_SSE4_1
;
33081 use_vector_set
= TARGET_SSE2
;
33084 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
33091 use_vector_set
= TARGET_AVX
;
33094 /* Use ix86_expand_vector_set in 64bit mode only. */
33095 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
33101 if (use_vector_set
)
33103 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
33104 var
= force_reg (GET_MODE_INNER (mode
), var
);
33105 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33121 var
= force_reg (GET_MODE_INNER (mode
), var
);
33122 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
33123 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33128 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
33129 new_target
= gen_reg_rtx (mode
);
33131 new_target
= target
;
33132 var
= force_reg (GET_MODE_INNER (mode
), var
);
33133 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
33134 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
33135 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
33138 /* We need to shuffle the value to the correct position, so
33139 create a new pseudo to store the intermediate result. */
33141 /* With SSE2, we can use the integer shuffle insns. */
33142 if (mode
!= V4SFmode
&& TARGET_SSE2
)
33144 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
33146 GEN_INT (one_var
== 1 ? 0 : 1),
33147 GEN_INT (one_var
== 2 ? 0 : 1),
33148 GEN_INT (one_var
== 3 ? 0 : 1)));
33149 if (target
!= new_target
)
33150 emit_move_insn (target
, new_target
);
33154 /* Otherwise convert the intermediate result to V4SFmode and
33155 use the SSE1 shuffle instructions. */
33156 if (mode
!= V4SFmode
)
33158 tmp
= gen_reg_rtx (V4SFmode
);
33159 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
33164 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
33166 GEN_INT (one_var
== 1 ? 0 : 1),
33167 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
33168 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
33170 if (mode
!= V4SFmode
)
33171 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
33172 else if (tmp
!= target
)
33173 emit_move_insn (target
, tmp
);
33175 else if (target
!= new_target
)
33176 emit_move_insn (target
, new_target
);
33181 vsimode
= V4SImode
;
33187 vsimode
= V2SImode
;
33193 /* Zero extend the variable element to SImode and recurse. */
33194 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
33196 x
= gen_reg_rtx (vsimode
);
33197 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
33199 gcc_unreachable ();
33201 emit_move_insn (target
, gen_lowpart (mode
, x
));
33209 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33210 consisting of the values in VALS. It is known that all elements
33211 except ONE_VAR are constants. Return true if successful. */
33214 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
33215 rtx target
, rtx vals
, int one_var
)
33217 rtx var
= XVECEXP (vals
, 0, one_var
);
33218 enum machine_mode wmode
;
33221 const_vec
= copy_rtx (vals
);
33222 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
33223 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
33231 /* For the two element vectors, it's just as easy to use
33232 the general case. */
33236 /* Use ix86_expand_vector_set in 64bit mode only. */
33259 /* There's no way to set one QImode entry easily. Combine
33260 the variable value with its adjacent constant value, and
33261 promote to an HImode set. */
33262 x
= XVECEXP (vals
, 0, one_var
^ 1);
33265 var
= convert_modes (HImode
, QImode
, var
, true);
33266 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
33267 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33268 x
= GEN_INT (INTVAL (x
) & 0xff);
33272 var
= convert_modes (HImode
, QImode
, var
, true);
33273 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
33275 if (x
!= const0_rtx
)
33276 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
33277 1, OPTAB_LIB_WIDEN
);
33279 x
= gen_reg_rtx (wmode
);
33280 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
33281 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
33283 emit_move_insn (target
, gen_lowpart (mode
, x
));
33290 emit_move_insn (target
, const_vec
);
33291 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33295 /* A subroutine of ix86_expand_vector_init_general. Use vector
33296 concatenate to handle the most general case: all values variable,
33297 and none identical. */
33300 ix86_expand_vector_init_concat (enum machine_mode mode
,
33301 rtx target
, rtx
*ops
, int n
)
33303 enum machine_mode cmode
, hmode
= VOIDmode
;
33304 rtx first
[8], second
[4];
33344 gcc_unreachable ();
33347 if (!register_operand (ops
[1], cmode
))
33348 ops
[1] = force_reg (cmode
, ops
[1]);
33349 if (!register_operand (ops
[0], cmode
))
33350 ops
[0] = force_reg (cmode
, ops
[0]);
33351 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33352 gen_rtx_VEC_CONCAT (mode
, ops
[0],
33372 gcc_unreachable ();
33388 gcc_unreachable ();
33393 /* FIXME: We process inputs backward to help RA. PR 36222. */
33396 for (; i
> 0; i
-= 2, j
--)
33398 first
[j
] = gen_reg_rtx (cmode
);
33399 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
33400 ix86_expand_vector_init (false, first
[j
],
33401 gen_rtx_PARALLEL (cmode
, v
));
33407 gcc_assert (hmode
!= VOIDmode
);
33408 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33410 second
[j
] = gen_reg_rtx (hmode
);
33411 ix86_expand_vector_init_concat (hmode
, second
[j
],
33415 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
33418 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
33422 gcc_unreachable ();
33426 /* A subroutine of ix86_expand_vector_init_general. Use vector
33427 interleave to handle the most general case: all values variable,
33428 and none identical. */
33431 ix86_expand_vector_init_interleave (enum machine_mode mode
,
33432 rtx target
, rtx
*ops
, int n
)
33434 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
33437 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
33438 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
33439 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
33444 gen_load_even
= gen_vec_setv8hi
;
33445 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
33446 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33447 inner_mode
= HImode
;
33448 first_imode
= V4SImode
;
33449 second_imode
= V2DImode
;
33450 third_imode
= VOIDmode
;
33453 gen_load_even
= gen_vec_setv16qi
;
33454 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
33455 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
33456 inner_mode
= QImode
;
33457 first_imode
= V8HImode
;
33458 second_imode
= V4SImode
;
33459 third_imode
= V2DImode
;
33462 gcc_unreachable ();
33465 for (i
= 0; i
< n
; i
++)
33467 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33468 op0
= gen_reg_rtx (SImode
);
33469 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
33471 /* Insert the SImode value as low element of V4SImode vector. */
33472 op1
= gen_reg_rtx (V4SImode
);
33473 op0
= gen_rtx_VEC_MERGE (V4SImode
,
33474 gen_rtx_VEC_DUPLICATE (V4SImode
,
33476 CONST0_RTX (V4SImode
),
33478 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
33480 /* Cast the V4SImode vector back to a vector in orignal mode. */
33481 op0
= gen_reg_rtx (mode
);
33482 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
33484 /* Load even elements into the second positon. */
33485 emit_insn (gen_load_even (op0
,
33486 force_reg (inner_mode
,
33490 /* Cast vector to FIRST_IMODE vector. */
33491 ops
[i
] = gen_reg_rtx (first_imode
);
33492 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
33495 /* Interleave low FIRST_IMODE vectors. */
33496 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33498 op0
= gen_reg_rtx (first_imode
);
33499 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
33501 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33502 ops
[j
] = gen_reg_rtx (second_imode
);
33503 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
33506 /* Interleave low SECOND_IMODE vectors. */
33507 switch (second_imode
)
33510 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
33512 op0
= gen_reg_rtx (second_imode
);
33513 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
33516 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33518 ops
[j
] = gen_reg_rtx (third_imode
);
33519 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
33521 second_imode
= V2DImode
;
33522 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33526 op0
= gen_reg_rtx (second_imode
);
33527 emit_insn (gen_interleave_second_low (op0
, ops
[0],
33530 /* Cast the SECOND_IMODE vector back to a vector on original
33532 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33533 gen_lowpart (mode
, op0
)));
33537 gcc_unreachable ();
33541 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33542 all values variable, and none identical. */
33545 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
33546 rtx target
, rtx vals
)
33548 rtx ops
[32], op0
, op1
;
33549 enum machine_mode half_mode
= VOIDmode
;
33556 if (!mmx_ok
&& !TARGET_SSE
)
33568 n
= GET_MODE_NUNITS (mode
);
33569 for (i
= 0; i
< n
; i
++)
33570 ops
[i
] = XVECEXP (vals
, 0, i
);
33571 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
33575 half_mode
= V16QImode
;
33579 half_mode
= V8HImode
;
33583 n
= GET_MODE_NUNITS (mode
);
33584 for (i
= 0; i
< n
; i
++)
33585 ops
[i
] = XVECEXP (vals
, 0, i
);
33586 op0
= gen_reg_rtx (half_mode
);
33587 op1
= gen_reg_rtx (half_mode
);
33588 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
33590 ix86_expand_vector_init_interleave (half_mode
, op1
,
33591 &ops
[n
>> 1], n
>> 2);
33592 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33593 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
33597 if (!TARGET_SSE4_1
)
33605 /* Don't use ix86_expand_vector_init_interleave if we can't
33606 move from GPR to SSE register directly. */
33607 if (!TARGET_INTER_UNIT_MOVES
)
33610 n
= GET_MODE_NUNITS (mode
);
33611 for (i
= 0; i
< n
; i
++)
33612 ops
[i
] = XVECEXP (vals
, 0, i
);
33613 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
33621 gcc_unreachable ();
33625 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
33626 enum machine_mode inner_mode
;
33627 rtx words
[4], shift
;
33629 inner_mode
= GET_MODE_INNER (mode
);
33630 n_elts
= GET_MODE_NUNITS (mode
);
33631 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
33632 n_elt_per_word
= n_elts
/ n_words
;
33633 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
33635 for (i
= 0; i
< n_words
; ++i
)
33637 rtx word
= NULL_RTX
;
33639 for (j
= 0; j
< n_elt_per_word
; ++j
)
33641 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
33642 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
33648 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
33649 word
, 1, OPTAB_LIB_WIDEN
);
33650 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
33651 word
, 1, OPTAB_LIB_WIDEN
);
33659 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
33660 else if (n_words
== 2)
33662 rtx tmp
= gen_reg_rtx (mode
);
33663 emit_clobber (tmp
);
33664 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
33665 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
33666 emit_move_insn (target
, tmp
);
33668 else if (n_words
== 4)
33670 rtx tmp
= gen_reg_rtx (V4SImode
);
33671 gcc_assert (word_mode
== SImode
);
33672 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
33673 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
33674 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
33677 gcc_unreachable ();
33681 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33682 instructions unless MMX_OK is true. */
33685 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
33687 enum machine_mode mode
= GET_MODE (target
);
33688 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33689 int n_elts
= GET_MODE_NUNITS (mode
);
33690 int n_var
= 0, one_var
= -1;
33691 bool all_same
= true, all_const_zero
= true;
33695 for (i
= 0; i
< n_elts
; ++i
)
33697 x
= XVECEXP (vals
, 0, i
);
33698 if (!(CONST_INT_P (x
)
33699 || GET_CODE (x
) == CONST_DOUBLE
33700 || GET_CODE (x
) == CONST_FIXED
))
33701 n_var
++, one_var
= i
;
33702 else if (x
!= CONST0_RTX (inner_mode
))
33703 all_const_zero
= false;
33704 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
33708 /* Constants are best loaded from the constant pool. */
33711 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
33715 /* If all values are identical, broadcast the value. */
33717 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
33718 XVECEXP (vals
, 0, 0)))
33721 /* Values where only one field is non-constant are best loaded from
33722 the pool and overwritten via move later. */
33726 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
33727 XVECEXP (vals
, 0, one_var
),
33731 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
33735 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
33739 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
33741 enum machine_mode mode
= GET_MODE (target
);
33742 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33743 enum machine_mode half_mode
;
33744 bool use_vec_merge
= false;
33746 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
33748 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
33749 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
33750 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
33751 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
33752 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
33753 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
33755 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
33757 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
33758 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
33759 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
33760 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
33761 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
33762 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
33772 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
33773 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
33775 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
33777 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
33778 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33784 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
33788 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
33789 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
33791 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
33793 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
33794 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33801 /* For the two element vectors, we implement a VEC_CONCAT with
33802 the extraction of the other element. */
33804 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
33805 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
33808 op0
= val
, op1
= tmp
;
33810 op0
= tmp
, op1
= val
;
33812 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
33813 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33818 use_vec_merge
= TARGET_SSE4_1
;
33825 use_vec_merge
= true;
33829 /* tmp = target = A B C D */
33830 tmp
= copy_to_reg (target
);
33831 /* target = A A B B */
33832 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
33833 /* target = X A B B */
33834 ix86_expand_vector_set (false, target
, val
, 0);
33835 /* target = A X C D */
33836 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33837 const1_rtx
, const0_rtx
,
33838 GEN_INT (2+4), GEN_INT (3+4)));
33842 /* tmp = target = A B C D */
33843 tmp
= copy_to_reg (target
);
33844 /* tmp = X B C D */
33845 ix86_expand_vector_set (false, tmp
, val
, 0);
33846 /* target = A B X D */
33847 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33848 const0_rtx
, const1_rtx
,
33849 GEN_INT (0+4), GEN_INT (3+4)));
33853 /* tmp = target = A B C D */
33854 tmp
= copy_to_reg (target
);
33855 /* tmp = X B C D */
33856 ix86_expand_vector_set (false, tmp
, val
, 0);
33857 /* target = A B X D */
33858 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33859 const0_rtx
, const1_rtx
,
33860 GEN_INT (2+4), GEN_INT (0+4)));
33864 gcc_unreachable ();
33869 use_vec_merge
= TARGET_SSE4_1
;
33873 /* Element 0 handled by vec_merge below. */
33876 use_vec_merge
= true;
33882 /* With SSE2, use integer shuffles to swap element 0 and ELT,
33883 store into element 0, then shuffle them back. */
33887 order
[0] = GEN_INT (elt
);
33888 order
[1] = const1_rtx
;
33889 order
[2] = const2_rtx
;
33890 order
[3] = GEN_INT (3);
33891 order
[elt
] = const0_rtx
;
33893 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
33894 order
[1], order
[2], order
[3]));
33896 ix86_expand_vector_set (false, target
, val
, 0);
33898 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
33899 order
[1], order
[2], order
[3]));
33903 /* For SSE1, we have to reuse the V4SF code. */
33904 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
33905 gen_lowpart (SFmode
, val
), elt
);
33910 use_vec_merge
= TARGET_SSE2
;
33913 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
33917 use_vec_merge
= TARGET_SSE4_1
;
33924 half_mode
= V16QImode
;
33930 half_mode
= V8HImode
;
33936 half_mode
= V4SImode
;
33942 half_mode
= V2DImode
;
33948 half_mode
= V4SFmode
;
33954 half_mode
= V2DFmode
;
33960 /* Compute offset. */
33964 gcc_assert (i
<= 1);
33966 /* Extract the half. */
33967 tmp
= gen_reg_rtx (half_mode
);
33968 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
33970 /* Put val in tmp at elt. */
33971 ix86_expand_vector_set (false, tmp
, val
, elt
);
33974 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
33983 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33984 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
33985 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33989 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
33991 emit_move_insn (mem
, target
);
33993 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
33994 emit_move_insn (tmp
, val
);
33996 emit_move_insn (target
, mem
);
34001 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
34003 enum machine_mode mode
= GET_MODE (vec
);
34004 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34005 bool use_vec_extr
= false;
34018 use_vec_extr
= true;
34022 use_vec_extr
= TARGET_SSE4_1
;
34034 tmp
= gen_reg_rtx (mode
);
34035 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
34036 GEN_INT (elt
), GEN_INT (elt
),
34037 GEN_INT (elt
+4), GEN_INT (elt
+4)));
34041 tmp
= gen_reg_rtx (mode
);
34042 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
34046 gcc_unreachable ();
34049 use_vec_extr
= true;
34054 use_vec_extr
= TARGET_SSE4_1
;
34068 tmp
= gen_reg_rtx (mode
);
34069 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
34070 GEN_INT (elt
), GEN_INT (elt
),
34071 GEN_INT (elt
), GEN_INT (elt
)));
34075 tmp
= gen_reg_rtx (mode
);
34076 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
34080 gcc_unreachable ();
34083 use_vec_extr
= true;
34088 /* For SSE1, we have to reuse the V4SF code. */
34089 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
34090 gen_lowpart (V4SFmode
, vec
), elt
);
34096 use_vec_extr
= TARGET_SSE2
;
34099 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34103 use_vec_extr
= TARGET_SSE4_1
;
34109 tmp
= gen_reg_rtx (V4SFmode
);
34111 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
34113 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
34114 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34122 tmp
= gen_reg_rtx (V2DFmode
);
34124 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
34126 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
34127 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34135 tmp
= gen_reg_rtx (V16QImode
);
34137 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
34139 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
34140 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
34148 tmp
= gen_reg_rtx (V8HImode
);
34150 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
34152 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
34153 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
34161 tmp
= gen_reg_rtx (V4SImode
);
34163 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
34165 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
34166 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34174 tmp
= gen_reg_rtx (V2DImode
);
34176 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
34178 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
34179 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34185 /* ??? Could extract the appropriate HImode element and shift. */
34192 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
34193 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
34195 /* Let the rtl optimizers know about the zero extension performed. */
34196 if (inner_mode
== QImode
|| inner_mode
== HImode
)
34198 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
34199 target
= gen_lowpart (SImode
, target
);
34202 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34206 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
34208 emit_move_insn (mem
, vec
);
34210 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34211 emit_move_insn (target
, tmp
);
34215 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34216 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34217 The upper bits of DEST are undefined, though they shouldn't cause
34218 exceptions (some bits from src or all zeros are ok). */
34221 emit_reduc_half (rtx dest
, rtx src
, int i
)
34224 switch (GET_MODE (src
))
34228 tem
= gen_sse_movhlps (dest
, src
, src
);
34230 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
34231 GEN_INT (1 + 4), GEN_INT (1 + 4));
34234 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
34240 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
34241 gen_lowpart (V1TImode
, src
),
34246 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
34248 tem
= gen_avx_shufps256 (dest
, src
, src
,
34249 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
34253 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
34255 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
34262 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
34263 gen_lowpart (V4DImode
, src
),
34264 gen_lowpart (V4DImode
, src
),
34267 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
34268 gen_lowpart (V2TImode
, src
),
34272 gcc_unreachable ();
34277 /* Expand a vector reduction. FN is the binary pattern to reduce;
34278 DEST is the destination; IN is the input vector. */
34281 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
34283 rtx half
, dst
, vec
= in
;
34284 enum machine_mode mode
= GET_MODE (in
);
34287 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34289 && mode
== V8HImode
34290 && fn
== gen_uminv8hi3
)
34292 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
34296 for (i
= GET_MODE_BITSIZE (mode
);
34297 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
34300 half
= gen_reg_rtx (mode
);
34301 emit_reduc_half (half
, vec
, i
);
34302 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
34305 dst
= gen_reg_rtx (mode
);
34306 emit_insn (fn (dst
, half
, vec
));
34311 /* Target hook for scalar_mode_supported_p. */
34313 ix86_scalar_mode_supported_p (enum machine_mode mode
)
34315 if (DECIMAL_FLOAT_MODE_P (mode
))
34316 return default_decimal_float_supported_p ();
34317 else if (mode
== TFmode
)
34320 return default_scalar_mode_supported_p (mode
);
34323 /* Implements target hook vector_mode_supported_p. */
34325 ix86_vector_mode_supported_p (enum machine_mode mode
)
34327 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34329 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34331 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34333 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
34335 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
34340 /* Target hook for c_mode_for_suffix. */
34341 static enum machine_mode
34342 ix86_c_mode_for_suffix (char suffix
)
34352 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34354 We do this in the new i386 backend to maintain source compatibility
34355 with the old cc0-based compiler. */
34358 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
34359 tree inputs ATTRIBUTE_UNUSED
,
34362 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
34364 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
34369 /* Implements target vector targetm.asm.encode_section_info. */
34371 static void ATTRIBUTE_UNUSED
34372 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
34374 default_encode_section_info (decl
, rtl
, first
);
34376 if (TREE_CODE (decl
) == VAR_DECL
34377 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
34378 && ix86_in_large_data_p (decl
))
34379 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
34382 /* Worker function for REVERSE_CONDITION. */
34385 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
34387 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
34388 ? reverse_condition (code
)
34389 : reverse_condition_maybe_unordered (code
));
34392 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34396 output_387_reg_move (rtx insn
, rtx
*operands
)
34398 if (REG_P (operands
[0]))
34400 if (REG_P (operands
[1])
34401 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34403 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
34404 return output_387_ffreep (operands
, 0);
34405 return "fstp\t%y0";
34407 if (STACK_TOP_P (operands
[0]))
34408 return "fld%Z1\t%y1";
34411 else if (MEM_P (operands
[0]))
34413 gcc_assert (REG_P (operands
[1]));
34414 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34415 return "fstp%Z0\t%y0";
34418 /* There is no non-popping store to memory for XFmode.
34419 So if we need one, follow the store with a load. */
34420 if (GET_MODE (operands
[0]) == XFmode
)
34421 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34423 return "fst%Z0\t%y0";
34430 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34431 FP status register is set. */
34434 ix86_emit_fp_unordered_jump (rtx label
)
34436 rtx reg
= gen_reg_rtx (HImode
);
34439 emit_insn (gen_x86_fnstsw_1 (reg
));
34441 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
34443 emit_insn (gen_x86_sahf_1 (reg
));
34445 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
34446 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
34450 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
34452 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34453 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
34456 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
34457 gen_rtx_LABEL_REF (VOIDmode
, label
),
34459 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
34461 emit_jump_insn (temp
);
34462 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
34465 /* Output code to perform a log1p XFmode calculation. */
34467 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
34469 rtx label1
= gen_label_rtx ();
34470 rtx label2
= gen_label_rtx ();
34472 rtx tmp
= gen_reg_rtx (XFmode
);
34473 rtx tmp2
= gen_reg_rtx (XFmode
);
34476 emit_insn (gen_absxf2 (tmp
, op1
));
34477 test
= gen_rtx_GE (VOIDmode
, tmp
,
34478 CONST_DOUBLE_FROM_REAL_VALUE (
34479 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
34481 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
34483 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34484 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
34485 emit_jump (label2
);
34487 emit_label (label1
);
34488 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
34489 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
34490 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34491 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
34493 emit_label (label2
);
34496 /* Emit code for round calculation. */
34497 void ix86_emit_i387_round (rtx op0
, rtx op1
)
34499 enum machine_mode inmode
= GET_MODE (op1
);
34500 enum machine_mode outmode
= GET_MODE (op0
);
34501 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
34502 rtx scratch
= gen_reg_rtx (HImode
);
34503 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34504 rtx jump_label
= gen_label_rtx ();
34506 rtx (*gen_abs
) (rtx
, rtx
);
34507 rtx (*gen_neg
) (rtx
, rtx
);
34512 gen_abs
= gen_abssf2
;
34515 gen_abs
= gen_absdf2
;
34518 gen_abs
= gen_absxf2
;
34521 gcc_unreachable ();
34527 gen_neg
= gen_negsf2
;
34530 gen_neg
= gen_negdf2
;
34533 gen_neg
= gen_negxf2
;
34536 gen_neg
= gen_neghi2
;
34539 gen_neg
= gen_negsi2
;
34542 gen_neg
= gen_negdi2
;
34545 gcc_unreachable ();
34548 e1
= gen_reg_rtx (inmode
);
34549 e2
= gen_reg_rtx (inmode
);
34550 res
= gen_reg_rtx (outmode
);
34552 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
34554 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34556 /* scratch = fxam(op1) */
34557 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
34558 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
34560 /* e1 = fabs(op1) */
34561 emit_insn (gen_abs (e1
, op1
));
34563 /* e2 = e1 + 0.5 */
34564 half
= force_reg (inmode
, half
);
34565 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34566 gen_rtx_PLUS (inmode
, e1
, half
)));
34568 /* res = floor(e2) */
34569 if (inmode
!= XFmode
)
34571 tmp1
= gen_reg_rtx (XFmode
);
34573 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
34574 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
34584 rtx tmp0
= gen_reg_rtx (XFmode
);
34586 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
34588 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34589 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
34590 UNSPEC_TRUNC_NOOP
)));
34594 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
34597 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
34600 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
34603 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
34606 gcc_unreachable ();
34609 /* flags = signbit(a) */
34610 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
34612 /* if (flags) then res = -res */
34613 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
34614 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
34615 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
34617 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34618 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
34619 JUMP_LABEL (insn
) = jump_label
;
34621 emit_insn (gen_neg (res
, res
));
34623 emit_label (jump_label
);
34624 LABEL_NUSES (jump_label
) = 1;
34626 emit_move_insn (op0
, res
);
34629 /* Output code to perform a Newton-Rhapson approximation of a single precision
34630 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34632 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
34634 rtx x0
, x1
, e0
, e1
;
34636 x0
= gen_reg_rtx (mode
);
34637 e0
= gen_reg_rtx (mode
);
34638 e1
= gen_reg_rtx (mode
);
34639 x1
= gen_reg_rtx (mode
);
34641 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34643 b
= force_reg (mode
, b
);
34645 /* x0 = rcp(b) estimate */
34646 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34647 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
34650 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34651 gen_rtx_MULT (mode
, x0
, b
)));
34654 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34655 gen_rtx_MULT (mode
, x0
, e0
)));
34658 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
34659 gen_rtx_PLUS (mode
, x0
, x0
)));
34662 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
34663 gen_rtx_MINUS (mode
, e1
, e0
)));
34666 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34667 gen_rtx_MULT (mode
, a
, x1
)));
34670 /* Output code to perform a Newton-Rhapson approximation of a
34671 single precision floating point [reciprocal] square root. */
34673 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
34676 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
34679 x0
= gen_reg_rtx (mode
);
34680 e0
= gen_reg_rtx (mode
);
34681 e1
= gen_reg_rtx (mode
);
34682 e2
= gen_reg_rtx (mode
);
34683 e3
= gen_reg_rtx (mode
);
34685 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
34686 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
34688 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
34689 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
34691 if (VECTOR_MODE_P (mode
))
34693 mthree
= ix86_build_const_vector (mode
, true, mthree
);
34694 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
34697 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34698 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34700 a
= force_reg (mode
, a
);
34702 /* x0 = rsqrt(a) estimate */
34703 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34704 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
34707 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34712 zero
= gen_reg_rtx (mode
);
34713 mask
= gen_reg_rtx (mode
);
34715 zero
= force_reg (mode
, CONST0_RTX(mode
));
34716 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
34717 gen_rtx_NE (mode
, zero
, a
)));
34719 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34720 gen_rtx_AND (mode
, x0
, mask
)));
34724 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34725 gen_rtx_MULT (mode
, x0
, a
)));
34727 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
34728 gen_rtx_MULT (mode
, e0
, x0
)));
34731 mthree
= force_reg (mode
, mthree
);
34732 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34733 gen_rtx_PLUS (mode
, e1
, mthree
)));
34735 mhalf
= force_reg (mode
, mhalf
);
34737 /* e3 = -.5 * x0 */
34738 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
34739 gen_rtx_MULT (mode
, x0
, mhalf
)));
34741 /* e3 = -.5 * e0 */
34742 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
34743 gen_rtx_MULT (mode
, e0
, mhalf
)));
34744 /* ret = e2 * e3 */
34745 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34746 gen_rtx_MULT (mode
, e2
, e3
)));
34749 #ifdef TARGET_SOLARIS
34750 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34753 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
34756 /* With Binutils 2.15, the "@unwind" marker must be specified on
34757 every occurrence of the ".eh_frame" section, not just the first
34760 && strcmp (name
, ".eh_frame") == 0)
34762 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
34763 flags
& SECTION_WRITE
? "aw" : "a");
34768 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
34770 solaris_elf_asm_comdat_section (name
, flags
, decl
);
34775 default_elf_asm_named_section (name
, flags
, decl
);
34777 #endif /* TARGET_SOLARIS */
34779 /* Return the mangling of TYPE if it is an extended fundamental type. */
34781 static const char *
34782 ix86_mangle_type (const_tree type
)
34784 type
= TYPE_MAIN_VARIANT (type
);
34786 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
34787 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
34790 switch (TYPE_MODE (type
))
34793 /* __float128 is "g". */
34796 /* "long double" or __float80 is "e". */
34803 /* For 32-bit code we can save PIC register setup by using
34804 __stack_chk_fail_local hidden function instead of calling
34805 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34806 register, so it is better to call __stack_chk_fail directly. */
34808 static tree ATTRIBUTE_UNUSED
34809 ix86_stack_protect_fail (void)
34811 return TARGET_64BIT
34812 ? default_external_stack_protect_fail ()
34813 : default_hidden_stack_protect_fail ();
34816 /* Select a format to encode pointers in exception handling data. CODE
34817 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34818 true if the symbol may be affected by dynamic relocations.
34820 ??? All x86 object file formats are capable of representing this.
34821 After all, the relocation needed is the same as for the call insn.
34822 Whether or not a particular assembler allows us to enter such, I
34823 guess we'll have to see. */
34825 asm_preferred_eh_data_format (int code
, int global
)
34829 int type
= DW_EH_PE_sdata8
;
34831 || ix86_cmodel
== CM_SMALL_PIC
34832 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
34833 type
= DW_EH_PE_sdata4
;
34834 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
34836 if (ix86_cmodel
== CM_SMALL
34837 || (ix86_cmodel
== CM_MEDIUM
&& code
))
34838 return DW_EH_PE_udata4
;
34839 return DW_EH_PE_absptr
;
34842 /* Expand copysign from SIGN to the positive value ABS_VALUE
34843 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34846 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
34848 enum machine_mode mode
= GET_MODE (sign
);
34849 rtx sgn
= gen_reg_rtx (mode
);
34850 if (mask
== NULL_RTX
)
34852 enum machine_mode vmode
;
34854 if (mode
== SFmode
)
34856 else if (mode
== DFmode
)
34861 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
34862 if (!VECTOR_MODE_P (mode
))
34864 /* We need to generate a scalar mode mask in this case. */
34865 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
34866 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
34867 mask
= gen_reg_rtx (mode
);
34868 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
34872 mask
= gen_rtx_NOT (mode
, mask
);
34873 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
34874 gen_rtx_AND (mode
, mask
, sign
)));
34875 emit_insn (gen_rtx_SET (VOIDmode
, result
,
34876 gen_rtx_IOR (mode
, abs_value
, sgn
)));
34879 /* Expand fabs (OP0) and return a new rtx that holds the result. The
34880 mask for masking out the sign-bit is stored in *SMASK, if that is
34883 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
34885 enum machine_mode vmode
, mode
= GET_MODE (op0
);
34888 xa
= gen_reg_rtx (mode
);
34889 if (mode
== SFmode
)
34891 else if (mode
== DFmode
)
34895 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
34896 if (!VECTOR_MODE_P (mode
))
34898 /* We need to generate a scalar mode mask in this case. */
34899 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
34900 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
34901 mask
= gen_reg_rtx (mode
);
34902 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
34904 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
34905 gen_rtx_AND (mode
, op0
, mask
)));
34913 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
34914 swapping the operands if SWAP_OPERANDS is true. The expanded
34915 code is a forward jump to a newly created label in case the
34916 comparison is true. The generated label rtx is returned. */
34918 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
34919 bool swap_operands
)
34930 label
= gen_label_rtx ();
34931 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
34932 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34933 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
34934 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
34935 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
34936 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
34937 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34938 JUMP_LABEL (tmp
) = label
;
34943 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
34944 using comparison code CODE. Operands are swapped for the comparison if
34945 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
34947 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
34948 bool swap_operands
)
34950 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
34951 enum machine_mode mode
= GET_MODE (op0
);
34952 rtx mask
= gen_reg_rtx (mode
);
34961 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
34963 emit_insn (insn (mask
, op0
, op1
,
34964 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
34968 /* Generate and return a rtx of mode MODE for 2**n where n is the number
34969 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
34971 ix86_gen_TWO52 (enum machine_mode mode
)
34973 REAL_VALUE_TYPE TWO52r
;
34976 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
34977 TWO52
= const_double_from_real_value (TWO52r
, mode
);
34978 TWO52
= force_reg (mode
, TWO52
);
34983 /* Expand SSE sequence for computing lround from OP1 storing
34986 ix86_expand_lround (rtx op0
, rtx op1
)
34988 /* C code for the stuff we're doing below:
34989 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
34992 enum machine_mode mode
= GET_MODE (op1
);
34993 const struct real_format
*fmt
;
34994 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
34997 /* load nextafter (0.5, 0.0) */
34998 fmt
= REAL_MODE_FORMAT (mode
);
34999 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35000 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35002 /* adj = copysign (0.5, op1) */
35003 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35004 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
35006 /* adj = op1 + adj */
35007 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35009 /* op0 = (imode)adj */
35010 expand_fix (op0
, adj
, 0);
35013 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35016 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
35018 /* C code for the stuff we're doing below (for do_floor):
35020 xi -= (double)xi > op1 ? 1 : 0;
35023 enum machine_mode fmode
= GET_MODE (op1
);
35024 enum machine_mode imode
= GET_MODE (op0
);
35025 rtx ireg
, freg
, label
, tmp
;
35027 /* reg = (long)op1 */
35028 ireg
= gen_reg_rtx (imode
);
35029 expand_fix (ireg
, op1
, 0);
35031 /* freg = (double)reg */
35032 freg
= gen_reg_rtx (fmode
);
35033 expand_float (freg
, ireg
, 0);
35035 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35036 label
= ix86_expand_sse_compare_and_jump (UNLE
,
35037 freg
, op1
, !do_floor
);
35038 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
35039 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
35040 emit_move_insn (ireg
, tmp
);
35042 emit_label (label
);
35043 LABEL_NUSES (label
) = 1;
35045 emit_move_insn (op0
, ireg
);
35048 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35049 result in OPERAND0. */
35051 ix86_expand_rint (rtx operand0
, rtx operand1
)
35053 /* C code for the stuff we're doing below:
35054 xa = fabs (operand1);
35055 if (!isless (xa, 2**52))
35057 xa = xa + 2**52 - 2**52;
35058 return copysign (xa, operand1);
35060 enum machine_mode mode
= GET_MODE (operand0
);
35061 rtx res
, xa
, label
, TWO52
, mask
;
35063 res
= gen_reg_rtx (mode
);
35064 emit_move_insn (res
, operand1
);
35066 /* xa = abs (operand1) */
35067 xa
= ix86_expand_sse_fabs (res
, &mask
);
35069 /* if (!isless (xa, TWO52)) goto label; */
35070 TWO52
= ix86_gen_TWO52 (mode
);
35071 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35073 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35074 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35076 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
35078 emit_label (label
);
35079 LABEL_NUSES (label
) = 1;
35081 emit_move_insn (operand0
, res
);
35084 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35087 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
35089 /* C code for the stuff we expand below.
35090 double xa = fabs (x), x2;
35091 if (!isless (xa, TWO52))
35093 xa = xa + TWO52 - TWO52;
35094 x2 = copysign (xa, x);
35103 enum machine_mode mode
= GET_MODE (operand0
);
35104 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
35106 TWO52
= ix86_gen_TWO52 (mode
);
35108 /* Temporary for holding the result, initialized to the input
35109 operand to ease control flow. */
35110 res
= gen_reg_rtx (mode
);
35111 emit_move_insn (res
, operand1
);
35113 /* xa = abs (operand1) */
35114 xa
= ix86_expand_sse_fabs (res
, &mask
);
35116 /* if (!isless (xa, TWO52)) goto label; */
35117 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35119 /* xa = xa + TWO52 - TWO52; */
35120 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35121 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35123 /* xa = copysign (xa, operand1) */
35124 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
35126 /* generate 1.0 or -1.0 */
35127 one
= force_reg (mode
,
35128 const_double_from_real_value (do_floor
35129 ? dconst1
: dconstm1
, mode
));
35131 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35132 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35133 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35134 gen_rtx_AND (mode
, one
, tmp
)));
35135 /* We always need to subtract here to preserve signed zero. */
35136 tmp
= expand_simple_binop (mode
, MINUS
,
35137 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35138 emit_move_insn (res
, tmp
);
35140 emit_label (label
);
35141 LABEL_NUSES (label
) = 1;
35143 emit_move_insn (operand0
, res
);
35146 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35149 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
35151 /* C code for the stuff we expand below.
35152 double xa = fabs (x), x2;
35153 if (!isless (xa, TWO52))
35155 x2 = (double)(long)x;
35162 if (HONOR_SIGNED_ZEROS (mode))
35163 return copysign (x2, x);
35166 enum machine_mode mode
= GET_MODE (operand0
);
35167 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
35169 TWO52
= ix86_gen_TWO52 (mode
);
35171 /* Temporary for holding the result, initialized to the input
35172 operand to ease control flow. */
35173 res
= gen_reg_rtx (mode
);
35174 emit_move_insn (res
, operand1
);
35176 /* xa = abs (operand1) */
35177 xa
= ix86_expand_sse_fabs (res
, &mask
);
35179 /* if (!isless (xa, TWO52)) goto label; */
35180 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35182 /* xa = (double)(long)x */
35183 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35184 expand_fix (xi
, res
, 0);
35185 expand_float (xa
, xi
, 0);
35188 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35190 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35191 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35192 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35193 gen_rtx_AND (mode
, one
, tmp
)));
35194 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
35195 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35196 emit_move_insn (res
, tmp
);
35198 if (HONOR_SIGNED_ZEROS (mode
))
35199 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35201 emit_label (label
);
35202 LABEL_NUSES (label
) = 1;
35204 emit_move_insn (operand0
, res
);
35207 /* Expand SSE sequence for computing round from OPERAND1 storing
35208 into OPERAND0. Sequence that works without relying on DImode truncation
35209 via cvttsd2siq that is only available on 64bit targets. */
35211 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
35213 /* C code for the stuff we expand below.
35214 double xa = fabs (x), xa2, x2;
35215 if (!isless (xa, TWO52))
35217 Using the absolute value and copying back sign makes
35218 -0.0 -> -0.0 correct.
35219 xa2 = xa + TWO52 - TWO52;
35224 else if (dxa > 0.5)
35226 x2 = copysign (xa2, x);
35229 enum machine_mode mode
= GET_MODE (operand0
);
35230 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
35232 TWO52
= ix86_gen_TWO52 (mode
);
35234 /* Temporary for holding the result, initialized to the input
35235 operand to ease control flow. */
35236 res
= gen_reg_rtx (mode
);
35237 emit_move_insn (res
, operand1
);
35239 /* xa = abs (operand1) */
35240 xa
= ix86_expand_sse_fabs (res
, &mask
);
35242 /* if (!isless (xa, TWO52)) goto label; */
35243 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35245 /* xa2 = xa + TWO52 - TWO52; */
35246 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35247 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
35249 /* dxa = xa2 - xa; */
35250 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
35252 /* generate 0.5, 1.0 and -0.5 */
35253 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
35254 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35255 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
35259 tmp
= gen_reg_rtx (mode
);
35260 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35261 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
35262 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35263 gen_rtx_AND (mode
, one
, tmp
)));
35264 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35265 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35266 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
35267 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35268 gen_rtx_AND (mode
, one
, tmp
)));
35269 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35271 /* res = copysign (xa2, operand1) */
35272 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
35274 emit_label (label
);
35275 LABEL_NUSES (label
) = 1;
35277 emit_move_insn (operand0
, res
);
35280 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35283 ix86_expand_trunc (rtx operand0
, rtx operand1
)
35285 /* C code for SSE variant we expand below.
35286 double xa = fabs (x), x2;
35287 if (!isless (xa, TWO52))
35289 x2 = (double)(long)x;
35290 if (HONOR_SIGNED_ZEROS (mode))
35291 return copysign (x2, x);
35294 enum machine_mode mode
= GET_MODE (operand0
);
35295 rtx xa
, xi
, TWO52
, label
, res
, mask
;
35297 TWO52
= ix86_gen_TWO52 (mode
);
35299 /* Temporary for holding the result, initialized to the input
35300 operand to ease control flow. */
35301 res
= gen_reg_rtx (mode
);
35302 emit_move_insn (res
, operand1
);
35304 /* xa = abs (operand1) */
35305 xa
= ix86_expand_sse_fabs (res
, &mask
);
35307 /* if (!isless (xa, TWO52)) goto label; */
35308 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35310 /* x = (double)(long)x */
35311 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35312 expand_fix (xi
, res
, 0);
35313 expand_float (res
, xi
, 0);
35315 if (HONOR_SIGNED_ZEROS (mode
))
35316 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35318 emit_label (label
);
35319 LABEL_NUSES (label
) = 1;
35321 emit_move_insn (operand0
, res
);
35324 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35327 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
35329 enum machine_mode mode
= GET_MODE (operand0
);
35330 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
35332 /* C code for SSE variant we expand below.
35333 double xa = fabs (x), x2;
35334 if (!isless (xa, TWO52))
35336 xa2 = xa + TWO52 - TWO52;
35340 x2 = copysign (xa2, x);
35344 TWO52
= ix86_gen_TWO52 (mode
);
35346 /* Temporary for holding the result, initialized to the input
35347 operand to ease control flow. */
35348 res
= gen_reg_rtx (mode
);
35349 emit_move_insn (res
, operand1
);
35351 /* xa = abs (operand1) */
35352 xa
= ix86_expand_sse_fabs (res
, &smask
);
35354 /* if (!isless (xa, TWO52)) goto label; */
35355 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35357 /* res = xa + TWO52 - TWO52; */
35358 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35359 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
35360 emit_move_insn (res
, tmp
);
35363 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35365 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35366 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
35367 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35368 gen_rtx_AND (mode
, mask
, one
)));
35369 tmp
= expand_simple_binop (mode
, MINUS
,
35370 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
35371 emit_move_insn (res
, tmp
);
35373 /* res = copysign (res, operand1) */
35374 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
35376 emit_label (label
);
35377 LABEL_NUSES (label
) = 1;
35379 emit_move_insn (operand0
, res
);
35382 /* Expand SSE sequence for computing round from OPERAND1 storing
35385 ix86_expand_round (rtx operand0
, rtx operand1
)
35387 /* C code for the stuff we're doing below:
35388 double xa = fabs (x);
35389 if (!isless (xa, TWO52))
35391 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35392 return copysign (xa, x);
35394 enum machine_mode mode
= GET_MODE (operand0
);
35395 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
35396 const struct real_format
*fmt
;
35397 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35399 /* Temporary for holding the result, initialized to the input
35400 operand to ease control flow. */
35401 res
= gen_reg_rtx (mode
);
35402 emit_move_insn (res
, operand1
);
35404 TWO52
= ix86_gen_TWO52 (mode
);
35405 xa
= ix86_expand_sse_fabs (res
, &mask
);
35406 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35408 /* load nextafter (0.5, 0.0) */
35409 fmt
= REAL_MODE_FORMAT (mode
);
35410 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35411 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35413 /* xa = xa + 0.5 */
35414 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35415 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35417 /* xa = (double)(int64_t)xa */
35418 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35419 expand_fix (xi
, xa
, 0);
35420 expand_float (xa
, xi
, 0);
35422 /* res = copysign (xa, operand1) */
35423 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
35425 emit_label (label
);
35426 LABEL_NUSES (label
) = 1;
35428 emit_move_insn (operand0
, res
);
35431 /* Expand SSE sequence for computing round
35432 from OP1 storing into OP0 using sse4 round insn. */
35434 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
35436 enum machine_mode mode
= GET_MODE (op0
);
35437 rtx e1
, e2
, res
, half
;
35438 const struct real_format
*fmt
;
35439 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35440 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
35441 rtx (*gen_round
) (rtx
, rtx
, rtx
);
35446 gen_copysign
= gen_copysignsf3
;
35447 gen_round
= gen_sse4_1_roundsf2
;
35450 gen_copysign
= gen_copysigndf3
;
35451 gen_round
= gen_sse4_1_rounddf2
;
35454 gcc_unreachable ();
35457 /* round (a) = trunc (a + copysign (0.5, a)) */
35459 /* load nextafter (0.5, 0.0) */
35460 fmt
= REAL_MODE_FORMAT (mode
);
35461 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35462 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35463 half
= const_double_from_real_value (pred_half
, mode
);
35465 /* e1 = copysign (0.5, op1) */
35466 e1
= gen_reg_rtx (mode
);
35467 emit_insn (gen_copysign (e1
, half
, op1
));
35469 /* e2 = op1 + e1 */
35470 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35472 /* res = trunc (e2) */
35473 res
= gen_reg_rtx (mode
);
35474 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
35476 emit_move_insn (op0
, res
);
35480 /* Table of valid machine attributes. */
35481 static const struct attribute_spec ix86_attribute_table
[] =
35483 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35484 affects_type_identity } */
35485 /* Stdcall attribute says callee is responsible for popping arguments
35486 if they are not variable. */
35487 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35489 /* Fastcall attribute says callee is responsible for popping arguments
35490 if they are not variable. */
35491 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35493 /* Thiscall attribute says callee is responsible for popping arguments
35494 if they are not variable. */
35495 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35497 /* Cdecl attribute says the callee is a normal C declaration */
35498 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35500 /* Regparm attribute specifies how many integer arguments are to be
35501 passed in registers. */
35502 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
35504 /* Sseregparm attribute says we are using x86_64 calling conventions
35505 for FP arguments. */
35506 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35508 /* The transactional memory builtins are implicitly regparm or fastcall
35509 depending on the ABI. Override the generic do-nothing attribute that
35510 these builtins were declared with. */
35511 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
35513 /* force_align_arg_pointer says this function realigns the stack at entry. */
35514 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
35515 false, true, true, ix86_handle_cconv_attribute
, false },
35516 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35517 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
35518 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
35519 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
35522 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35524 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35526 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35527 SUBTARGET_ATTRIBUTE_TABLE
,
35529 /* ms_abi and sysv_abi calling convention function attributes. */
35530 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35531 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35532 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
35534 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35535 ix86_handle_callee_pop_aggregate_return
, true },
35537 { NULL
, 0, 0, false, false, false, NULL
, false }
35540 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35542 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
35543 tree vectype ATTRIBUTE_UNUSED
,
35544 int misalign ATTRIBUTE_UNUSED
)
35546 switch (type_of_cost
)
35549 return ix86_cost
->scalar_stmt_cost
;
35552 return ix86_cost
->scalar_load_cost
;
35555 return ix86_cost
->scalar_store_cost
;
35558 return ix86_cost
->vec_stmt_cost
;
35561 return ix86_cost
->vec_align_load_cost
;
35564 return ix86_cost
->vec_store_cost
;
35566 case vec_to_scalar
:
35567 return ix86_cost
->vec_to_scalar_cost
;
35569 case scalar_to_vec
:
35570 return ix86_cost
->scalar_to_vec_cost
;
35572 case unaligned_load
:
35573 case unaligned_store
:
35574 return ix86_cost
->vec_unalign_load_cost
;
35576 case cond_branch_taken
:
35577 return ix86_cost
->cond_taken_branch_cost
;
35579 case cond_branch_not_taken
:
35580 return ix86_cost
->cond_not_taken_branch_cost
;
35583 case vec_promote_demote
:
35584 return ix86_cost
->vec_stmt_cost
;
35587 gcc_unreachable ();
35591 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
35592 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
35593 insn every time. */
35595 static GTY(()) rtx vselect_insn
;
35597 /* Initialize vselect_insn. */
35600 init_vselect_insn (void)
35605 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
35606 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
35607 XVECEXP (x
, 0, i
) = const0_rtx
;
35608 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
35610 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
35612 vselect_insn
= emit_insn (x
);
35616 /* Construct (set target (vec_select op0 (parallel perm))) and
35617 return true if that's a valid instruction in the active ISA. */
35620 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
35621 unsigned nelt
, bool testing_p
)
35624 rtx x
, save_vconcat
;
35627 if (vselect_insn
== NULL_RTX
)
35628 init_vselect_insn ();
35630 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
35631 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
35632 for (i
= 0; i
< nelt
; ++i
)
35633 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
35634 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
35635 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
35636 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
35637 SET_DEST (PATTERN (vselect_insn
)) = target
;
35638 icode
= recog_memoized (vselect_insn
);
35640 if (icode
>= 0 && !testing_p
)
35641 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
35643 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
35644 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
35645 INSN_CODE (vselect_insn
) = -1;
35650 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35653 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
35654 const unsigned char *perm
, unsigned nelt
,
35657 enum machine_mode v2mode
;
35661 if (vselect_insn
== NULL_RTX
)
35662 init_vselect_insn ();
35664 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
35665 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
35666 PUT_MODE (x
, v2mode
);
35669 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
35670 XEXP (x
, 0) = const0_rtx
;
35671 XEXP (x
, 1) = const0_rtx
;
35675 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35676 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35679 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
35681 enum machine_mode vmode
= d
->vmode
;
35682 unsigned i
, mask
, nelt
= d
->nelt
;
35683 rtx target
, op0
, op1
, x
;
35684 rtx rperm
[32], vperm
;
35686 if (d
->op0
== d
->op1
)
35688 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
35690 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
35692 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
35697 /* This is a blend, not a permute. Elements must stay in their
35698 respective lanes. */
35699 for (i
= 0; i
< nelt
; ++i
)
35701 unsigned e
= d
->perm
[i
];
35702 if (!(e
== i
|| e
== i
+ nelt
))
35709 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35710 decision should be extracted elsewhere, so that we only try that
35711 sequence once all budget==3 options have been tried. */
35712 target
= d
->target
;
35725 for (i
= 0; i
< nelt
; ++i
)
35726 mask
|= (d
->perm
[i
] >= nelt
) << i
;
35730 for (i
= 0; i
< 2; ++i
)
35731 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
35736 for (i
= 0; i
< 4; ++i
)
35737 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
35742 /* See if bytes move in pairs so we can use pblendw with
35743 an immediate argument, rather than pblendvb with a vector
35745 for (i
= 0; i
< 16; i
+= 2)
35746 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35749 for (i
= 0; i
< nelt
; ++i
)
35750 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
35753 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
35754 vperm
= force_reg (vmode
, vperm
);
35756 if (GET_MODE_SIZE (vmode
) == 16)
35757 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
35759 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
35763 for (i
= 0; i
< 8; ++i
)
35764 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
35769 target
= gen_lowpart (vmode
, target
);
35770 op0
= gen_lowpart (vmode
, op0
);
35771 op1
= gen_lowpart (vmode
, op1
);
35775 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35776 for (i
= 0; i
< 32; i
+= 2)
35777 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35779 /* See if bytes move in quadruplets. If yes, vpblendd
35780 with immediate can be used. */
35781 for (i
= 0; i
< 32; i
+= 4)
35782 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
35786 /* See if bytes move the same in both lanes. If yes,
35787 vpblendw with immediate can be used. */
35788 for (i
= 0; i
< 16; i
+= 2)
35789 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
35792 /* Use vpblendw. */
35793 for (i
= 0; i
< 16; ++i
)
35794 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
35799 /* Use vpblendd. */
35800 for (i
= 0; i
< 8; ++i
)
35801 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
35806 /* See if words move in pairs. If yes, vpblendd can be used. */
35807 for (i
= 0; i
< 16; i
+= 2)
35808 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35812 /* See if words move the same in both lanes. If not,
35813 vpblendvb must be used. */
35814 for (i
= 0; i
< 8; i
++)
35815 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
35817 /* Use vpblendvb. */
35818 for (i
= 0; i
< 32; ++i
)
35819 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
35823 target
= gen_lowpart (vmode
, target
);
35824 op0
= gen_lowpart (vmode
, op0
);
35825 op1
= gen_lowpart (vmode
, op1
);
35826 goto finish_pblendvb
;
35829 /* Use vpblendw. */
35830 for (i
= 0; i
< 16; ++i
)
35831 mask
|= (d
->perm
[i
] >= 16) << i
;
35835 /* Use vpblendd. */
35836 for (i
= 0; i
< 8; ++i
)
35837 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
35842 /* Use vpblendd. */
35843 for (i
= 0; i
< 4; ++i
)
35844 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
35849 gcc_unreachable ();
35852 /* This matches five different patterns with the different modes. */
35853 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
35854 x
= gen_rtx_SET (VOIDmode
, target
, x
);
35860 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35861 in terms of the variable form of vpermilps.
35863 Note that we will have already failed the immediate input vpermilps,
35864 which requires that the high and low part shuffle be identical; the
35865 variable form doesn't require that. */
35868 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
35870 rtx rperm
[8], vperm
;
35873 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| d
->op0
!= d
->op1
)
35876 /* We can only permute within the 128-bit lane. */
35877 for (i
= 0; i
< 8; ++i
)
35879 unsigned e
= d
->perm
[i
];
35880 if (i
< 4 ? e
>= 4 : e
< 4)
35887 for (i
= 0; i
< 8; ++i
)
35889 unsigned e
= d
->perm
[i
];
35891 /* Within each 128-bit lane, the elements of op0 are numbered
35892 from 0 and the elements of op1 are numbered from 4. */
35898 rperm
[i
] = GEN_INT (e
);
35901 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
35902 vperm
= force_reg (V8SImode
, vperm
);
35903 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
35908 /* Return true if permutation D can be performed as VMODE permutation
35912 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
35914 unsigned int i
, j
, chunk
;
35916 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
35917 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
35918 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
35921 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
35924 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
35925 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
35926 if (d
->perm
[i
] & (chunk
- 1))
35929 for (j
= 1; j
< chunk
; ++j
)
35930 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
35936 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35937 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
35940 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
35942 unsigned i
, nelt
, eltsz
, mask
;
35943 unsigned char perm
[32];
35944 enum machine_mode vmode
= V16QImode
;
35945 rtx rperm
[32], vperm
, target
, op0
, op1
;
35949 if (d
->op0
!= d
->op1
)
35951 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
35954 && valid_perm_using_mode_p (V2TImode
, d
))
35959 /* Use vperm2i128 insn. The pattern uses
35960 V4DImode instead of V2TImode. */
35961 target
= gen_lowpart (V4DImode
, d
->target
);
35962 op0
= gen_lowpart (V4DImode
, d
->op0
);
35963 op1
= gen_lowpart (V4DImode
, d
->op1
);
35965 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
35966 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
35967 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
35975 if (GET_MODE_SIZE (d
->vmode
) == 16)
35980 else if (GET_MODE_SIZE (d
->vmode
) == 32)
35985 /* V4DImode should be already handled through
35986 expand_vselect by vpermq instruction. */
35987 gcc_assert (d
->vmode
!= V4DImode
);
35990 if (d
->vmode
== V8SImode
35991 || d
->vmode
== V16HImode
35992 || d
->vmode
== V32QImode
)
35994 /* First see if vpermq can be used for
35995 V8SImode/V16HImode/V32QImode. */
35996 if (valid_perm_using_mode_p (V4DImode
, d
))
35998 for (i
= 0; i
< 4; i
++)
35999 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
36002 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
36003 gen_lowpart (V4DImode
, d
->op0
),
36007 /* Next see if vpermd can be used. */
36008 if (valid_perm_using_mode_p (V8SImode
, d
))
36011 /* Or if vpermps can be used. */
36012 else if (d
->vmode
== V8SFmode
)
36015 if (vmode
== V32QImode
)
36017 /* vpshufb only works intra lanes, it is not
36018 possible to shuffle bytes in between the lanes. */
36019 for (i
= 0; i
< nelt
; ++i
)
36020 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
36031 if (vmode
== V8SImode
)
36032 for (i
= 0; i
< 8; ++i
)
36033 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
36036 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36037 if (d
->op0
!= d
->op1
)
36038 mask
= 2 * nelt
- 1;
36039 else if (vmode
== V16QImode
)
36042 mask
= nelt
/ 2 - 1;
36044 for (i
= 0; i
< nelt
; ++i
)
36046 unsigned j
, e
= d
->perm
[i
] & mask
;
36047 for (j
= 0; j
< eltsz
; ++j
)
36048 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
36052 vperm
= gen_rtx_CONST_VECTOR (vmode
,
36053 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
36054 vperm
= force_reg (vmode
, vperm
);
36056 if (vmode
== V8SImode
&& d
->vmode
== V8SFmode
)
36059 vperm
= gen_lowpart (vmode
, vperm
);
36062 target
= gen_lowpart (vmode
, d
->target
);
36063 op0
= gen_lowpart (vmode
, d
->op0
);
36064 if (d
->op0
== d
->op1
)
36066 if (vmode
== V16QImode
)
36067 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
36068 else if (vmode
== V32QImode
)
36069 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
36070 else if (vmode
== V8SFmode
)
36071 emit_insn (gen_avx2_permvarv8sf (target
, vperm
, op0
));
36073 emit_insn (gen_avx2_permvarv8si (target
, vperm
, op0
));
36077 op1
= gen_lowpart (vmode
, d
->op1
);
36078 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
36084 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36085 in a single instruction. */
36088 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
36090 unsigned i
, nelt
= d
->nelt
;
36091 unsigned char perm2
[MAX_VECT_LEN
];
36093 /* Check plain VEC_SELECT first, because AVX has instructions that could
36094 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36095 input where SEL+CONCAT may not. */
36096 if (d
->op0
== d
->op1
)
36098 int mask
= nelt
- 1;
36099 bool identity_perm
= true;
36100 bool broadcast_perm
= true;
36102 for (i
= 0; i
< nelt
; i
++)
36104 perm2
[i
] = d
->perm
[i
] & mask
;
36106 identity_perm
= false;
36108 broadcast_perm
= false;
36114 emit_move_insn (d
->target
, d
->op0
);
36117 else if (broadcast_perm
&& TARGET_AVX2
)
36119 /* Use vpbroadcast{b,w,d}. */
36120 rtx (*gen
) (rtx
, rtx
) = NULL
;
36124 gen
= gen_avx2_pbroadcastv32qi_1
;
36127 gen
= gen_avx2_pbroadcastv16hi_1
;
36130 gen
= gen_avx2_pbroadcastv8si_1
;
36133 gen
= gen_avx2_pbroadcastv16qi
;
36136 gen
= gen_avx2_pbroadcastv8hi
;
36139 gen
= gen_avx2_vec_dupv8sf_1
;
36141 /* For other modes prefer other shuffles this function creates. */
36147 emit_insn (gen (d
->target
, d
->op0
));
36152 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
36155 /* There are plenty of patterns in sse.md that are written for
36156 SEL+CONCAT and are not replicated for a single op. Perhaps
36157 that should be changed, to avoid the nastiness here. */
36159 /* Recognize interleave style patterns, which means incrementing
36160 every other permutation operand. */
36161 for (i
= 0; i
< nelt
; i
+= 2)
36163 perm2
[i
] = d
->perm
[i
] & mask
;
36164 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
36166 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
36170 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36173 for (i
= 0; i
< nelt
; i
+= 4)
36175 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
36176 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
36177 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
36178 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
36181 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
36187 /* Finally, try the fully general two operand permute. */
36188 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
36192 /* Recognize interleave style patterns with reversed operands. */
36193 if (d
->op0
!= d
->op1
)
36195 for (i
= 0; i
< nelt
; ++i
)
36197 unsigned e
= d
->perm
[i
];
36205 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
36210 /* Try the SSE4.1 blend variable merge instructions. */
36211 if (expand_vec_perm_blend (d
))
36214 /* Try one of the AVX vpermil variable permutations. */
36215 if (expand_vec_perm_vpermil (d
))
36218 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36219 vpshufb, vpermd, vpermps or vpermq variable permutation. */
36220 if (expand_vec_perm_pshufb (d
))
36226 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36227 in terms of a pair of pshuflw + pshufhw instructions. */
36230 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
36232 unsigned char perm2
[MAX_VECT_LEN
];
36236 if (d
->vmode
!= V8HImode
|| d
->op0
!= d
->op1
)
36239 /* The two permutations only operate in 64-bit lanes. */
36240 for (i
= 0; i
< 4; ++i
)
36241 if (d
->perm
[i
] >= 4)
36243 for (i
= 4; i
< 8; ++i
)
36244 if (d
->perm
[i
] < 4)
36250 /* Emit the pshuflw. */
36251 memcpy (perm2
, d
->perm
, 4);
36252 for (i
= 4; i
< 8; ++i
)
36254 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
36257 /* Emit the pshufhw. */
36258 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
36259 for (i
= 0; i
< 4; ++i
)
36261 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
36267 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36268 the permutation using the SSSE3 palignr instruction. This succeeds
36269 when all of the elements in PERM fit within one vector and we merely
36270 need to shift them down so that a single vector permutation has a
36271 chance to succeed. */
36274 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
36276 unsigned i
, nelt
= d
->nelt
;
36281 /* Even with AVX, palignr only operates on 128-bit vectors. */
36282 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36285 min
= nelt
, max
= 0;
36286 for (i
= 0; i
< nelt
; ++i
)
36288 unsigned e
= d
->perm
[i
];
36294 if (min
== 0 || max
- min
>= nelt
)
36297 /* Given that we have SSSE3, we know we'll be able to implement the
36298 single operand permutation after the palignr with pshufb. */
36302 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
36303 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
36304 gen_lowpart (TImode
, d
->op1
),
36305 gen_lowpart (TImode
, d
->op0
), shift
));
36307 d
->op0
= d
->op1
= d
->target
;
36310 for (i
= 0; i
< nelt
; ++i
)
36312 unsigned e
= d
->perm
[i
] - min
;
36318 /* Test for the degenerate case where the alignment by itself
36319 produces the desired permutation. */
36323 ok
= expand_vec_perm_1 (d
);
36329 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
36331 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36332 a two vector permutation into a single vector permutation by using
36333 an interleave operation to merge the vectors. */
36336 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
36338 struct expand_vec_perm_d dremap
, dfinal
;
36339 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36340 unsigned HOST_WIDE_INT contents
;
36341 unsigned char remap
[2 * MAX_VECT_LEN
];
36343 bool ok
, same_halves
= false;
36345 if (GET_MODE_SIZE (d
->vmode
) == 16)
36347 if (d
->op0
== d
->op1
)
36350 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36354 /* For 32-byte modes allow even d->op0 == d->op1.
36355 The lack of cross-lane shuffling in some instructions
36356 might prevent a single insn shuffle. */
36358 dfinal
.testing_p
= true;
36359 /* If expand_vec_perm_interleave3 can expand this into
36360 a 3 insn sequence, give up and let it be expanded as
36361 3 insn sequence. While that is one insn longer,
36362 it doesn't need a memory operand and in the common
36363 case that both interleave low and high permutations
36364 with the same operands are adjacent needs 4 insns
36365 for both after CSE. */
36366 if (expand_vec_perm_interleave3 (&dfinal
))
36372 /* Examine from whence the elements come. */
36374 for (i
= 0; i
< nelt
; ++i
)
36375 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
36377 memset (remap
, 0xff, sizeof (remap
));
36380 if (GET_MODE_SIZE (d
->vmode
) == 16)
36382 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
36384 /* Split the two input vectors into 4 halves. */
36385 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
36390 /* If the elements from the low halves use interleave low, and similarly
36391 for interleave high. If the elements are from mis-matched halves, we
36392 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36393 if ((contents
& (h1
| h3
)) == contents
)
36396 for (i
= 0; i
< nelt2
; ++i
)
36399 remap
[i
+ nelt
] = i
* 2 + 1;
36400 dremap
.perm
[i
* 2] = i
;
36401 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36403 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36404 dremap
.vmode
= V4SFmode
;
36406 else if ((contents
& (h2
| h4
)) == contents
)
36409 for (i
= 0; i
< nelt2
; ++i
)
36411 remap
[i
+ nelt2
] = i
* 2;
36412 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
36413 dremap
.perm
[i
* 2] = i
+ nelt2
;
36414 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
36416 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36417 dremap
.vmode
= V4SFmode
;
36419 else if ((contents
& (h1
| h4
)) == contents
)
36422 for (i
= 0; i
< nelt2
; ++i
)
36425 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
36426 dremap
.perm
[i
] = i
;
36427 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
36432 dremap
.vmode
= V2DImode
;
36434 dremap
.perm
[0] = 0;
36435 dremap
.perm
[1] = 3;
36438 else if ((contents
& (h2
| h3
)) == contents
)
36441 for (i
= 0; i
< nelt2
; ++i
)
36443 remap
[i
+ nelt2
] = i
;
36444 remap
[i
+ nelt
] = i
+ nelt2
;
36445 dremap
.perm
[i
] = i
+ nelt2
;
36446 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
36451 dremap
.vmode
= V2DImode
;
36453 dremap
.perm
[0] = 1;
36454 dremap
.perm
[1] = 2;
36462 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
36463 unsigned HOST_WIDE_INT q
[8];
36464 unsigned int nonzero_halves
[4];
36466 /* Split the two input vectors into 8 quarters. */
36467 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
36468 for (i
= 1; i
< 8; ++i
)
36469 q
[i
] = q
[0] << (nelt4
* i
);
36470 for (i
= 0; i
< 4; ++i
)
36471 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
36473 nonzero_halves
[nzcnt
] = i
;
36479 gcc_assert (d
->op0
== d
->op1
);
36480 nonzero_halves
[1] = nonzero_halves
[0];
36481 same_halves
= true;
36483 else if (d
->op0
== d
->op1
)
36485 gcc_assert (nonzero_halves
[0] == 0);
36486 gcc_assert (nonzero_halves
[1] == 1);
36491 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
36493 /* Attempt to increase the likelyhood that dfinal
36494 shuffle will be intra-lane. */
36495 char tmph
= nonzero_halves
[0];
36496 nonzero_halves
[0] = nonzero_halves
[1];
36497 nonzero_halves
[1] = tmph
;
36500 /* vperm2f128 or vperm2i128. */
36501 for (i
= 0; i
< nelt2
; ++i
)
36503 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
36504 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
36505 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
36506 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
36509 if (d
->vmode
!= V8SFmode
36510 && d
->vmode
!= V4DFmode
36511 && d
->vmode
!= V8SImode
)
36513 dremap
.vmode
= V8SImode
;
36515 for (i
= 0; i
< 4; ++i
)
36517 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
36518 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
36522 else if (d
->op0
== d
->op1
)
36524 else if (TARGET_AVX2
36525 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
36528 for (i
= 0; i
< nelt4
; ++i
)
36531 remap
[i
+ nelt
] = i
* 2 + 1;
36532 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
36533 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
36534 dremap
.perm
[i
* 2] = i
;
36535 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36536 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
36537 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
36540 else if (TARGET_AVX2
36541 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
36544 for (i
= 0; i
< nelt4
; ++i
)
36546 remap
[i
+ nelt4
] = i
* 2;
36547 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
36548 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
36549 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
36550 dremap
.perm
[i
* 2] = i
+ nelt4
;
36551 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
36552 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
36553 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
36560 /* Use the remapping array set up above to move the elements from their
36561 swizzled locations into their final destinations. */
36563 for (i
= 0; i
< nelt
; ++i
)
36565 unsigned e
= remap
[d
->perm
[i
]];
36566 gcc_assert (e
< nelt
);
36567 /* If same_halves is true, both halves of the remapped vector are the
36568 same. Avoid cross-lane accesses if possible. */
36569 if (same_halves
&& i
>= nelt2
)
36571 gcc_assert (e
< nelt2
);
36572 dfinal
.perm
[i
] = e
+ nelt2
;
36575 dfinal
.perm
[i
] = e
;
36577 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
36578 dfinal
.op1
= dfinal
.op0
;
36579 dremap
.target
= dfinal
.op0
;
36581 /* Test if the final remap can be done with a single insn. For V4SFmode or
36582 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36584 ok
= expand_vec_perm_1 (&dfinal
);
36585 seq
= get_insns ();
36594 if (dremap
.vmode
!= dfinal
.vmode
)
36596 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
36597 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
36598 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
36601 ok
= expand_vec_perm_1 (&dremap
);
36608 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36609 a single vector cross-lane permutation into vpermq followed
36610 by any of the single insn permutations. */
36613 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
36615 struct expand_vec_perm_d dremap
, dfinal
;
36616 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
36617 unsigned contents
[2];
36621 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
36622 && d
->op0
== d
->op1
))
36627 for (i
= 0; i
< nelt2
; ++i
)
36629 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
36630 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
36633 for (i
= 0; i
< 2; ++i
)
36635 unsigned int cnt
= 0;
36636 for (j
= 0; j
< 4; ++j
)
36637 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
36645 dremap
.vmode
= V4DImode
;
36647 dremap
.target
= gen_reg_rtx (V4DImode
);
36648 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
36649 dremap
.op1
= dremap
.op0
;
36650 for (i
= 0; i
< 2; ++i
)
36652 unsigned int cnt
= 0;
36653 for (j
= 0; j
< 4; ++j
)
36654 if ((contents
[i
] & (1u << j
)) != 0)
36655 dremap
.perm
[2 * i
+ cnt
++] = j
;
36656 for (; cnt
< 2; ++cnt
)
36657 dremap
.perm
[2 * i
+ cnt
] = 0;
36661 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
36662 dfinal
.op1
= dfinal
.op0
;
36663 for (i
= 0, j
= 0; i
< nelt
; ++i
)
36667 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
36668 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
36670 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
36671 dfinal
.perm
[i
] |= nelt4
;
36673 gcc_unreachable ();
36676 ok
= expand_vec_perm_1 (&dremap
);
36679 ok
= expand_vec_perm_1 (&dfinal
);
36685 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
36686 a vector permutation using two instructions, vperm2f128 resp.
36687 vperm2i128 followed by any single in-lane permutation. */
36690 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
36692 struct expand_vec_perm_d dfirst
, dsecond
;
36693 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
36697 || GET_MODE_SIZE (d
->vmode
) != 32
36698 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
36702 if (d
->op0
== d
->op1
)
36703 dsecond
.op1
= gen_reg_rtx (d
->vmode
);
36704 dsecond
.testing_p
= true;
36706 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
36707 immediate. For perm < 16 the second permutation uses
36708 d->op0 as first operand, for perm >= 16 it uses d->op1
36709 as first operand. The second operand is the result of
36711 for (perm
= 0; perm
< 32; perm
++)
36713 /* Ignore permutations which do not move anything cross-lane. */
36716 /* The second shuffle for e.g. V4DFmode has
36717 0123 and ABCD operands.
36718 Ignore AB23, as 23 is already in the second lane
36719 of the first operand. */
36720 if ((perm
& 0xc) == (1 << 2)) continue;
36721 /* And 01CD, as 01 is in the first lane of the first
36723 if ((perm
& 3) == 0) continue;
36724 /* And 4567, as then the vperm2[fi]128 doesn't change
36725 anything on the original 4567 second operand. */
36726 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
36730 /* The second shuffle for e.g. V4DFmode has
36731 4567 and ABCD operands.
36732 Ignore AB67, as 67 is already in the second lane
36733 of the first operand. */
36734 if ((perm
& 0xc) == (3 << 2)) continue;
36735 /* And 45CD, as 45 is in the first lane of the first
36737 if ((perm
& 3) == 2) continue;
36738 /* And 0123, as then the vperm2[fi]128 doesn't change
36739 anything on the original 0123 first operand. */
36740 if ((perm
& 0xf) == (1 << 2)) continue;
36743 for (i
= 0; i
< nelt
; i
++)
36745 j
= d
->perm
[i
] / nelt2
;
36746 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
36747 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
36748 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
36749 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
36757 ok
= expand_vec_perm_1 (&dsecond
);
36768 /* Found a usable second shuffle. dfirst will be
36769 vperm2f128 on d->op0 and d->op1. */
36770 dsecond
.testing_p
= false;
36772 if (d
->op0
== d
->op1
)
36773 dfirst
.target
= dsecond
.op1
;
36775 dfirst
.target
= gen_reg_rtx (d
->vmode
);
36776 for (i
= 0; i
< nelt
; i
++)
36777 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
36778 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
36780 ok
= expand_vec_perm_1 (&dfirst
);
36783 /* And dsecond is some single insn shuffle, taking
36784 d->op0 and result of vperm2f128 (if perm < 16) or
36785 d->op1 and result of vperm2f128 (otherwise). */
36786 dsecond
.op1
= dfirst
.target
;
36788 dsecond
.op0
= dfirst
.op1
;
36790 ok
= expand_vec_perm_1 (&dsecond
);
36796 /* For d->op0 == d->op1 the only useful vperm2f128 permutation
36798 if (d
->op0
== d
->op1
)
36805 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36806 a two vector permutation using 2 intra-lane interleave insns
36807 and cross-lane shuffle for 32-byte vectors. */
36810 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
36813 rtx (*gen
) (rtx
, rtx
, rtx
);
36815 if (d
->op0
== d
->op1
)
36817 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
36819 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
36825 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
36827 for (i
= 0; i
< nelt
; i
+= 2)
36828 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
36829 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
36839 gen
= gen_vec_interleave_highv32qi
;
36841 gen
= gen_vec_interleave_lowv32qi
;
36845 gen
= gen_vec_interleave_highv16hi
;
36847 gen
= gen_vec_interleave_lowv16hi
;
36851 gen
= gen_vec_interleave_highv8si
;
36853 gen
= gen_vec_interleave_lowv8si
;
36857 gen
= gen_vec_interleave_highv4di
;
36859 gen
= gen_vec_interleave_lowv4di
;
36863 gen
= gen_vec_interleave_highv8sf
;
36865 gen
= gen_vec_interleave_lowv8sf
;
36869 gen
= gen_vec_interleave_highv4df
;
36871 gen
= gen_vec_interleave_lowv4df
;
36874 gcc_unreachable ();
36877 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
36881 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
36882 a single vector permutation using a single intra-lane vector
36883 permutation, vperm2f128 swapping the lanes and vblend* insn blending
36884 the non-swapped and swapped vectors together. */
36887 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
36889 struct expand_vec_perm_d dfirst
, dsecond
;
36890 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36893 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
36897 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
36898 || d
->op0
!= d
->op1
)
36902 for (i
= 0; i
< nelt
; i
++)
36903 dfirst
.perm
[i
] = 0xff;
36904 for (i
= 0, msk
= 0; i
< nelt
; i
++)
36906 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
36907 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
36909 dfirst
.perm
[j
] = d
->perm
[i
];
36913 for (i
= 0; i
< nelt
; i
++)
36914 if (dfirst
.perm
[i
] == 0xff)
36915 dfirst
.perm
[i
] = i
;
36918 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
36921 ok
= expand_vec_perm_1 (&dfirst
);
36922 seq
= get_insns ();
36934 dsecond
.op0
= dfirst
.target
;
36935 dsecond
.op1
= dfirst
.target
;
36936 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
36937 for (i
= 0; i
< nelt
; i
++)
36938 dsecond
.perm
[i
] = i
^ nelt2
;
36940 ok
= expand_vec_perm_1 (&dsecond
);
36943 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
36944 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
36948 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36949 permutation with two pshufb insns and an ior. We should have already
36950 failed all two instruction sequences. */
36953 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
36955 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
36956 unsigned int i
, nelt
, eltsz
;
36958 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36960 gcc_assert (d
->op0
!= d
->op1
);
36963 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36965 /* Generate two permutation masks. If the required element is within
36966 the given vector it is shuffled into the proper lane. If the required
36967 element is in the other vector, force a zero into the lane by setting
36968 bit 7 in the permutation mask. */
36969 m128
= GEN_INT (-128);
36970 for (i
= 0; i
< nelt
; ++i
)
36972 unsigned j
, e
= d
->perm
[i
];
36973 unsigned which
= (e
>= nelt
);
36977 for (j
= 0; j
< eltsz
; ++j
)
36979 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
36980 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
36984 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
36985 vperm
= force_reg (V16QImode
, vperm
);
36987 l
= gen_reg_rtx (V16QImode
);
36988 op
= gen_lowpart (V16QImode
, d
->op0
);
36989 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
36991 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
36992 vperm
= force_reg (V16QImode
, vperm
);
36994 h
= gen_reg_rtx (V16QImode
);
36995 op
= gen_lowpart (V16QImode
, d
->op1
);
36996 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
36998 op
= gen_lowpart (V16QImode
, d
->target
);
36999 emit_insn (gen_iorv16qi3 (op
, l
, h
));
37004 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
37005 with two vpshufb insns, vpermq and vpor. We should have already failed
37006 all two or three instruction sequences. */
37009 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
37011 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
37012 unsigned int i
, nelt
, eltsz
;
37015 || d
->op0
!= d
->op1
37016 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37023 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37025 /* Generate two permutation masks. If the required element is within
37026 the same lane, it is shuffled in. If the required element from the
37027 other lane, force a zero by setting bit 7 in the permutation mask.
37028 In the other mask the mask has non-negative elements if element
37029 is requested from the other lane, but also moved to the other lane,
37030 so that the result of vpshufb can have the two V2TImode halves
37032 m128
= GEN_INT (-128);
37033 for (i
= 0; i
< nelt
; ++i
)
37035 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37036 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37038 for (j
= 0; j
< eltsz
; ++j
)
37040 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
37041 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
37045 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37046 vperm
= force_reg (V32QImode
, vperm
);
37048 h
= gen_reg_rtx (V32QImode
);
37049 op
= gen_lowpart (V32QImode
, d
->op0
);
37050 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37052 /* Swap the 128-byte lanes of h into hp. */
37053 hp
= gen_reg_rtx (V4DImode
);
37054 op
= gen_lowpart (V4DImode
, h
);
37055 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
37058 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37059 vperm
= force_reg (V32QImode
, vperm
);
37061 l
= gen_reg_rtx (V32QImode
);
37062 op
= gen_lowpart (V32QImode
, d
->op0
);
37063 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37065 op
= gen_lowpart (V32QImode
, d
->target
);
37066 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
37071 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
37072 and extract-odd permutations of two V32QImode and V16QImode operand
37073 with two vpshufb insns, vpor and vpermq. We should have already
37074 failed all two or three instruction sequences. */
37077 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
37079 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
37080 unsigned int i
, nelt
, eltsz
;
37083 || d
->op0
== d
->op1
37084 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37087 for (i
= 0; i
< d
->nelt
; ++i
)
37088 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
37095 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37097 /* Generate two permutation masks. In the first permutation mask
37098 the first quarter will contain indexes for the first half
37099 of the op0, the second quarter will contain bit 7 set, third quarter
37100 will contain indexes for the second half of the op0 and the
37101 last quarter bit 7 set. In the second permutation mask
37102 the first quarter will contain bit 7 set, the second quarter
37103 indexes for the first half of the op1, the third quarter bit 7 set
37104 and last quarter indexes for the second half of the op1.
37105 I.e. the first mask e.g. for V32QImode extract even will be:
37106 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
37107 (all values masked with 0xf except for -128) and second mask
37108 for extract even will be
37109 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
37110 m128
= GEN_INT (-128);
37111 for (i
= 0; i
< nelt
; ++i
)
37113 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37114 unsigned which
= d
->perm
[i
] >= nelt
;
37115 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
37117 for (j
= 0; j
< eltsz
; ++j
)
37119 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
37120 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
37124 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37125 vperm
= force_reg (V32QImode
, vperm
);
37127 l
= gen_reg_rtx (V32QImode
);
37128 op
= gen_lowpart (V32QImode
, d
->op0
);
37129 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37131 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37132 vperm
= force_reg (V32QImode
, vperm
);
37134 h
= gen_reg_rtx (V32QImode
);
37135 op
= gen_lowpart (V32QImode
, d
->op1
);
37136 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37138 ior
= gen_reg_rtx (V32QImode
);
37139 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
37141 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
37142 op
= gen_lowpart (V4DImode
, d
->target
);
37143 ior
= gen_lowpart (V4DImode
, ior
);
37144 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
37145 const1_rtx
, GEN_INT (3)));
37150 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
37151 and extract-odd permutations. */
37154 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
37161 t1
= gen_reg_rtx (V4DFmode
);
37162 t2
= gen_reg_rtx (V4DFmode
);
37164 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37165 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37166 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37168 /* Now an unpck[lh]pd will produce the result required. */
37170 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
37172 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
37178 int mask
= odd
? 0xdd : 0x88;
37180 t1
= gen_reg_rtx (V8SFmode
);
37181 t2
= gen_reg_rtx (V8SFmode
);
37182 t3
= gen_reg_rtx (V8SFmode
);
37184 /* Shuffle within the 128-bit lanes to produce:
37185 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
37186 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
37189 /* Shuffle the lanes around to produce:
37190 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
37191 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
37194 /* Shuffle within the 128-bit lanes to produce:
37195 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
37196 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
37198 /* Shuffle within the 128-bit lanes to produce:
37199 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
37200 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
37202 /* Shuffle the lanes around to produce:
37203 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
37204 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
37213 /* These are always directly implementable by expand_vec_perm_1. */
37214 gcc_unreachable ();
37218 return expand_vec_perm_pshufb2 (d
);
37221 /* We need 2*log2(N)-1 operations to achieve odd/even
37222 with interleave. */
37223 t1
= gen_reg_rtx (V8HImode
);
37224 t2
= gen_reg_rtx (V8HImode
);
37225 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
37226 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
37227 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
37228 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
37230 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
37232 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
37239 return expand_vec_perm_pshufb2 (d
);
37242 t1
= gen_reg_rtx (V16QImode
);
37243 t2
= gen_reg_rtx (V16QImode
);
37244 t3
= gen_reg_rtx (V16QImode
);
37245 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
37246 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
37247 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
37248 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
37249 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
37250 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
37252 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
37254 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
37261 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
37266 struct expand_vec_perm_d d_copy
= *d
;
37267 d_copy
.vmode
= V4DFmode
;
37268 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
37269 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
37270 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
37271 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37274 t1
= gen_reg_rtx (V4DImode
);
37275 t2
= gen_reg_rtx (V4DImode
);
37277 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37278 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37279 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37281 /* Now an vpunpck[lh]qdq will produce the result required. */
37283 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
37285 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
37292 struct expand_vec_perm_d d_copy
= *d
;
37293 d_copy
.vmode
= V8SFmode
;
37294 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
37295 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
37296 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
37297 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37300 t1
= gen_reg_rtx (V8SImode
);
37301 t2
= gen_reg_rtx (V8SImode
);
37303 /* Shuffle the lanes around into
37304 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37305 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
37306 gen_lowpart (V4DImode
, d
->op0
),
37307 gen_lowpart (V4DImode
, d
->op1
),
37309 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
37310 gen_lowpart (V4DImode
, d
->op0
),
37311 gen_lowpart (V4DImode
, d
->op1
),
37314 /* Swap the 2nd and 3rd position in each lane into
37315 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37316 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
37317 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37318 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
37319 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37321 /* Now an vpunpck[lh]qdq will produce
37322 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37324 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
37325 gen_lowpart (V4DImode
, t1
),
37326 gen_lowpart (V4DImode
, t2
));
37328 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
37329 gen_lowpart (V4DImode
, t1
),
37330 gen_lowpart (V4DImode
, t2
));
37335 gcc_unreachable ();
37341 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37342 extract-even and extract-odd permutations. */
37345 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
37347 unsigned i
, odd
, nelt
= d
->nelt
;
37350 if (odd
!= 0 && odd
!= 1)
37353 for (i
= 1; i
< nelt
; ++i
)
37354 if (d
->perm
[i
] != 2 * i
+ odd
)
37357 return expand_vec_perm_even_odd_1 (d
, odd
);
37360 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37361 permutations. We assume that expand_vec_perm_1 has already failed. */
37364 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
37366 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
37367 enum machine_mode vmode
= d
->vmode
;
37368 unsigned char perm2
[4];
37376 /* These are special-cased in sse.md so that we can optionally
37377 use the vbroadcast instruction. They expand to two insns
37378 if the input happens to be in a register. */
37379 gcc_unreachable ();
37385 /* These are always implementable using standard shuffle patterns. */
37386 gcc_unreachable ();
37390 /* These can be implemented via interleave. We save one insn by
37391 stopping once we have promoted to V4SImode and then use pshufd. */
37395 rtx (*gen
) (rtx
, rtx
, rtx
)
37396 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
37397 : gen_vec_interleave_lowv8hi
;
37401 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
37402 : gen_vec_interleave_highv8hi
;
37407 dest
= gen_reg_rtx (vmode
);
37408 emit_insn (gen (dest
, op0
, op0
));
37409 vmode
= get_mode_wider_vector (vmode
);
37410 op0
= gen_lowpart (vmode
, dest
);
37412 while (vmode
!= V4SImode
);
37414 memset (perm2
, elt
, 4);
37415 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
37424 /* For AVX2 broadcasts of the first element vpbroadcast* or
37425 vpermq should be used by expand_vec_perm_1. */
37426 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
37430 gcc_unreachable ();
37434 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37435 broadcast permutations. */
37438 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
37440 unsigned i
, elt
, nelt
= d
->nelt
;
37442 if (d
->op0
!= d
->op1
)
37446 for (i
= 1; i
< nelt
; ++i
)
37447 if (d
->perm
[i
] != elt
)
37450 return expand_vec_perm_broadcast_1 (d
);
37453 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37454 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37455 all the shorter instruction sequences. */
37458 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
37460 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
37461 unsigned int i
, nelt
, eltsz
;
37465 || d
->op0
== d
->op1
37466 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37473 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37475 /* Generate 4 permutation masks. If the required element is within
37476 the same lane, it is shuffled in. If the required element from the
37477 other lane, force a zero by setting bit 7 in the permutation mask.
37478 In the other mask the mask has non-negative elements if element
37479 is requested from the other lane, but also moved to the other lane,
37480 so that the result of vpshufb can have the two V2TImode halves
37482 m128
= GEN_INT (-128);
37483 for (i
= 0; i
< 32; ++i
)
37485 rperm
[0][i
] = m128
;
37486 rperm
[1][i
] = m128
;
37487 rperm
[2][i
] = m128
;
37488 rperm
[3][i
] = m128
;
37494 for (i
= 0; i
< nelt
; ++i
)
37496 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37497 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37498 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
37500 for (j
= 0; j
< eltsz
; ++j
)
37501 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
37502 used
[which
] = true;
37505 for (i
= 0; i
< 2; ++i
)
37507 if (!used
[2 * i
+ 1])
37512 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
37513 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
37514 vperm
= force_reg (V32QImode
, vperm
);
37515 h
[i
] = gen_reg_rtx (V32QImode
);
37516 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37517 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
37520 /* Swap the 128-byte lanes of h[X]. */
37521 for (i
= 0; i
< 2; ++i
)
37523 if (h
[i
] == NULL_RTX
)
37525 op
= gen_reg_rtx (V4DImode
);
37526 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
37527 const2_rtx
, GEN_INT (3), const0_rtx
,
37529 h
[i
] = gen_lowpart (V32QImode
, op
);
37532 for (i
= 0; i
< 2; ++i
)
37539 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
37540 vperm
= force_reg (V32QImode
, vperm
);
37541 l
[i
] = gen_reg_rtx (V32QImode
);
37542 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37543 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
37546 for (i
= 0; i
< 2; ++i
)
37550 op
= gen_reg_rtx (V32QImode
);
37551 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
37558 gcc_assert (l
[0] && l
[1]);
37559 op
= gen_lowpart (V32QImode
, d
->target
);
37560 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
37564 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37565 With all of the interface bits taken care of, perform the expansion
37566 in D and return true on success. */
37569 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
37571 /* Try a single instruction expansion. */
37572 if (expand_vec_perm_1 (d
))
37575 /* Try sequences of two instructions. */
37577 if (expand_vec_perm_pshuflw_pshufhw (d
))
37580 if (expand_vec_perm_palignr (d
))
37583 if (expand_vec_perm_interleave2 (d
))
37586 if (expand_vec_perm_broadcast (d
))
37589 if (expand_vec_perm_vpermq_perm_1 (d
))
37592 if (expand_vec_perm_vperm2f128 (d
))
37595 /* Try sequences of three instructions. */
37597 if (expand_vec_perm_pshufb2 (d
))
37600 if (expand_vec_perm_interleave3 (d
))
37603 if (expand_vec_perm_vperm2f128_vblend (d
))
37606 /* Try sequences of four instructions. */
37608 if (expand_vec_perm_vpshufb2_vpermq (d
))
37611 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
37614 /* ??? Look for narrow permutations whose element orderings would
37615 allow the promotion to a wider mode. */
37617 /* ??? Look for sequences of interleave or a wider permute that place
37618 the data into the correct lanes for a half-vector shuffle like
37619 pshuf[lh]w or vpermilps. */
37621 /* ??? Look for sequences of interleave that produce the desired results.
37622 The combinatorics of punpck[lh] get pretty ugly... */
37624 if (expand_vec_perm_even_odd (d
))
37627 /* Even longer sequences. */
37628 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
37635 ix86_expand_vec_perm_const (rtx operands
[4])
37637 struct expand_vec_perm_d d
;
37638 unsigned char perm
[MAX_VECT_LEN
];
37639 int i
, nelt
, which
;
37642 d
.target
= operands
[0];
37643 d
.op0
= operands
[1];
37644 d
.op1
= operands
[2];
37647 d
.vmode
= GET_MODE (d
.target
);
37648 gcc_assert (VECTOR_MODE_P (d
.vmode
));
37649 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37650 d
.testing_p
= false;
37652 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
37653 gcc_assert (XVECLEN (sel
, 0) == nelt
);
37654 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
37656 for (i
= which
= 0; i
< nelt
; ++i
)
37658 rtx e
= XVECEXP (sel
, 0, i
);
37659 int ei
= INTVAL (e
) & (2 * nelt
- 1);
37661 which
|= (ei
< nelt
? 1 : 2);
37672 if (!rtx_equal_p (d
.op0
, d
.op1
))
37675 /* The elements of PERM do not suggest that only the first operand
37676 is used, but both operands are identical. Allow easier matching
37677 of the permutation by folding the permutation into the single
37679 for (i
= 0; i
< nelt
; ++i
)
37680 if (d
.perm
[i
] >= nelt
)
37689 for (i
= 0; i
< nelt
; ++i
)
37695 if (ix86_expand_vec_perm_const_1 (&d
))
37698 /* If the mask says both arguments are needed, but they are the same,
37699 the above tried to expand with d.op0 == d.op1. If that didn't work,
37700 retry with d.op0 != d.op1 as that is what testing has been done with. */
37701 if (which
== 3 && d
.op0
== d
.op1
)
37706 memcpy (d
.perm
, perm
, sizeof (perm
));
37707 d
.op1
= gen_reg_rtx (d
.vmode
);
37709 ok
= ix86_expand_vec_perm_const_1 (&d
);
37710 seq
= get_insns ();
37714 emit_move_insn (d
.op1
, d
.op0
);
37723 /* Implement targetm.vectorize.vec_perm_const_ok. */
37726 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
37727 const unsigned char *sel
)
37729 struct expand_vec_perm_d d
;
37730 unsigned int i
, nelt
, which
;
37734 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37735 d
.testing_p
= true;
37737 /* Given sufficient ISA support we can just return true here
37738 for selected vector modes. */
37739 if (GET_MODE_SIZE (d
.vmode
) == 16)
37741 /* All implementable with a single vpperm insn. */
37744 /* All implementable with 2 pshufb + 1 ior. */
37747 /* All implementable with shufpd or unpck[lh]pd. */
37752 /* Extract the values from the vector CST into the permutation
37754 memcpy (d
.perm
, sel
, nelt
);
37755 for (i
= which
= 0; i
< nelt
; ++i
)
37757 unsigned char e
= d
.perm
[i
];
37758 gcc_assert (e
< 2 * nelt
);
37759 which
|= (e
< nelt
? 1 : 2);
37762 /* For all elements from second vector, fold the elements to first. */
37764 for (i
= 0; i
< nelt
; ++i
)
37767 /* Check whether the mask can be applied to the vector type. */
37768 one_vec
= (which
!= 3);
37770 /* Implementable with shufps or pshufd. */
37771 if (one_vec
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
37774 /* Otherwise we have to go through the motions and see if we can
37775 figure out how to generate the requested permutation. */
37776 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
37777 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
37779 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
37782 ret
= ix86_expand_vec_perm_const_1 (&d
);
37789 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
37791 struct expand_vec_perm_d d
;
37797 d
.vmode
= GET_MODE (targ
);
37798 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37799 d
.testing_p
= false;
37801 for (i
= 0; i
< nelt
; ++i
)
37802 d
.perm
[i
] = i
* 2 + odd
;
37804 /* We'll either be able to implement the permutation directly... */
37805 if (expand_vec_perm_1 (&d
))
37808 /* ... or we use the special-case patterns. */
37809 expand_vec_perm_even_odd_1 (&d
, odd
);
37812 /* Expand an insert into a vector register through pinsr insn.
37813 Return true if successful. */
37816 ix86_expand_pinsr (rtx
*operands
)
37818 rtx dst
= operands
[0];
37819 rtx src
= operands
[3];
37821 unsigned int size
= INTVAL (operands
[1]);
37822 unsigned int pos
= INTVAL (operands
[2]);
37824 if (GET_CODE (dst
) == SUBREG
)
37826 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
37827 dst
= SUBREG_REG (dst
);
37830 if (GET_CODE (src
) == SUBREG
)
37831 src
= SUBREG_REG (src
);
37833 switch (GET_MODE (dst
))
37840 enum machine_mode srcmode
, dstmode
;
37841 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
37843 srcmode
= mode_for_size (size
, MODE_INT
, 0);
37848 if (!TARGET_SSE4_1
)
37850 dstmode
= V16QImode
;
37851 pinsr
= gen_sse4_1_pinsrb
;
37857 dstmode
= V8HImode
;
37858 pinsr
= gen_sse2_pinsrw
;
37862 if (!TARGET_SSE4_1
)
37864 dstmode
= V4SImode
;
37865 pinsr
= gen_sse4_1_pinsrd
;
37869 gcc_assert (TARGET_64BIT
);
37870 if (!TARGET_SSE4_1
)
37872 dstmode
= V2DImode
;
37873 pinsr
= gen_sse4_1_pinsrq
;
37880 dst
= gen_lowpart (dstmode
, dst
);
37881 src
= gen_lowpart (srcmode
, src
);
37885 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
37894 /* This function returns the calling abi specific va_list type node.
37895 It returns the FNDECL specific va_list type. */
37898 ix86_fn_abi_va_list (tree fndecl
)
37901 return va_list_type_node
;
37902 gcc_assert (fndecl
!= NULL_TREE
);
37904 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
37905 return ms_va_list_type_node
;
37907 return sysv_va_list_type_node
;
37910 /* Returns the canonical va_list type specified by TYPE. If there
37911 is no valid TYPE provided, it return NULL_TREE. */
37914 ix86_canonical_va_list_type (tree type
)
37918 /* Resolve references and pointers to va_list type. */
37919 if (TREE_CODE (type
) == MEM_REF
)
37920 type
= TREE_TYPE (type
);
37921 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
37922 type
= TREE_TYPE (type
);
37923 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
37924 type
= TREE_TYPE (type
);
37926 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
37928 wtype
= va_list_type_node
;
37929 gcc_assert (wtype
!= NULL_TREE
);
37931 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37933 /* If va_list is an array type, the argument may have decayed
37934 to a pointer type, e.g. by being passed to another function.
37935 In that case, unwrap both types so that we can compare the
37936 underlying records. */
37937 if (TREE_CODE (htype
) == ARRAY_TYPE
37938 || POINTER_TYPE_P (htype
))
37940 wtype
= TREE_TYPE (wtype
);
37941 htype
= TREE_TYPE (htype
);
37944 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37945 return va_list_type_node
;
37946 wtype
= sysv_va_list_type_node
;
37947 gcc_assert (wtype
!= NULL_TREE
);
37949 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37951 /* If va_list is an array type, the argument may have decayed
37952 to a pointer type, e.g. by being passed to another function.
37953 In that case, unwrap both types so that we can compare the
37954 underlying records. */
37955 if (TREE_CODE (htype
) == ARRAY_TYPE
37956 || POINTER_TYPE_P (htype
))
37958 wtype
= TREE_TYPE (wtype
);
37959 htype
= TREE_TYPE (htype
);
37962 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37963 return sysv_va_list_type_node
;
37964 wtype
= ms_va_list_type_node
;
37965 gcc_assert (wtype
!= NULL_TREE
);
37967 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37969 /* If va_list is an array type, the argument may have decayed
37970 to a pointer type, e.g. by being passed to another function.
37971 In that case, unwrap both types so that we can compare the
37972 underlying records. */
37973 if (TREE_CODE (htype
) == ARRAY_TYPE
37974 || POINTER_TYPE_P (htype
))
37976 wtype
= TREE_TYPE (wtype
);
37977 htype
= TREE_TYPE (htype
);
37980 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37981 return ms_va_list_type_node
;
37984 return std_canonical_va_list_type (type
);
37987 /* Iterate through the target-specific builtin types for va_list.
37988 IDX denotes the iterator, *PTREE is set to the result type of
37989 the va_list builtin, and *PNAME to its internal type.
37990 Returns zero if there is no element for this index, otherwise
37991 IDX should be increased upon the next call.
37992 Note, do not iterate a base builtin's name like __builtin_va_list.
37993 Used from c_common_nodes_and_builtins. */
37996 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
38006 *ptree
= ms_va_list_type_node
;
38007 *pname
= "__builtin_ms_va_list";
38011 *ptree
= sysv_va_list_type_node
;
38012 *pname
= "__builtin_sysv_va_list";
38020 #undef TARGET_SCHED_DISPATCH
38021 #define TARGET_SCHED_DISPATCH has_dispatch
38022 #undef TARGET_SCHED_DISPATCH_DO
38023 #define TARGET_SCHED_DISPATCH_DO do_dispatch
38024 #undef TARGET_SCHED_REASSOCIATION_WIDTH
38025 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
38027 /* The size of the dispatch window is the total number of bytes of
38028 object code allowed in a window. */
38029 #define DISPATCH_WINDOW_SIZE 16
38031 /* Number of dispatch windows considered for scheduling. */
38032 #define MAX_DISPATCH_WINDOWS 3
38034 /* Maximum number of instructions in a window. */
38037 /* Maximum number of immediate operands in a window. */
38040 /* Maximum number of immediate bits allowed in a window. */
38041 #define MAX_IMM_SIZE 128
38043 /* Maximum number of 32 bit immediates allowed in a window. */
38044 #define MAX_IMM_32 4
38046 /* Maximum number of 64 bit immediates allowed in a window. */
38047 #define MAX_IMM_64 2
38049 /* Maximum total of loads or prefetches allowed in a window. */
38052 /* Maximum total of stores allowed in a window. */
38053 #define MAX_STORE 1
38059 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
38060 enum dispatch_group
{
38075 /* Number of allowable groups in a dispatch window. It is an array
38076 indexed by dispatch_group enum. 100 is used as a big number,
38077 because the number of these kind of operations does not have any
38078 effect in dispatch window, but we need them for other reasons in
38080 static unsigned int num_allowable_groups
[disp_last
] = {
38081 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
38084 char group_name
[disp_last
+ 1][16] = {
38085 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
38086 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
38087 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
38090 /* Instruction path. */
38093 path_single
, /* Single micro op. */
38094 path_double
, /* Double micro op. */
38095 path_multi
, /* Instructions with more than 2 micro op.. */
38099 /* sched_insn_info defines a window to the instructions scheduled in
38100 the basic block. It contains a pointer to the insn_info table and
38101 the instruction scheduled.
38103 Windows are allocated for each basic block and are linked
38105 typedef struct sched_insn_info_s
{
38107 enum dispatch_group group
;
38108 enum insn_path path
;
38113 /* Linked list of dispatch windows. This is a two way list of
38114 dispatch windows of a basic block. It contains information about
38115 the number of uops in the window and the total number of
38116 instructions and of bytes in the object code for this dispatch
38118 typedef struct dispatch_windows_s
{
38119 int num_insn
; /* Number of insn in the window. */
38120 int num_uops
; /* Number of uops in the window. */
38121 int window_size
; /* Number of bytes in the window. */
38122 int window_num
; /* Window number between 0 or 1. */
38123 int num_imm
; /* Number of immediates in an insn. */
38124 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
38125 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
38126 int imm_size
; /* Total immediates in the window. */
38127 int num_loads
; /* Total memory loads in the window. */
38128 int num_stores
; /* Total memory stores in the window. */
38129 int violation
; /* Violation exists in window. */
38130 sched_insn_info
*window
; /* Pointer to the window. */
38131 struct dispatch_windows_s
*next
;
38132 struct dispatch_windows_s
*prev
;
38133 } dispatch_windows
;
38135 /* Immediate valuse used in an insn. */
38136 typedef struct imm_info_s
38143 static dispatch_windows
*dispatch_window_list
;
38144 static dispatch_windows
*dispatch_window_list1
;
38146 /* Get dispatch group of insn. */
38148 static enum dispatch_group
38149 get_mem_group (rtx insn
)
38151 enum attr_memory memory
;
38153 if (INSN_CODE (insn
) < 0)
38154 return disp_no_group
;
38155 memory
= get_attr_memory (insn
);
38156 if (memory
== MEMORY_STORE
)
38159 if (memory
== MEMORY_LOAD
)
38162 if (memory
== MEMORY_BOTH
)
38163 return disp_load_store
;
38165 return disp_no_group
;
38168 /* Return true if insn is a compare instruction. */
38173 enum attr_type type
;
38175 type
= get_attr_type (insn
);
38176 return (type
== TYPE_TEST
38177 || type
== TYPE_ICMP
38178 || type
== TYPE_FCMP
38179 || GET_CODE (PATTERN (insn
)) == COMPARE
);
38182 /* Return true if a dispatch violation encountered. */
38185 dispatch_violation (void)
38187 if (dispatch_window_list
->next
)
38188 return dispatch_window_list
->next
->violation
;
38189 return dispatch_window_list
->violation
;
38192 /* Return true if insn is a branch instruction. */
38195 is_branch (rtx insn
)
38197 return (CALL_P (insn
) || JUMP_P (insn
));
38200 /* Return true if insn is a prefetch instruction. */
38203 is_prefetch (rtx insn
)
38205 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
38208 /* This function initializes a dispatch window and the list container holding a
38209 pointer to the window. */
38212 init_window (int window_num
)
38215 dispatch_windows
*new_list
;
38217 if (window_num
== 0)
38218 new_list
= dispatch_window_list
;
38220 new_list
= dispatch_window_list1
;
38222 new_list
->num_insn
= 0;
38223 new_list
->num_uops
= 0;
38224 new_list
->window_size
= 0;
38225 new_list
->next
= NULL
;
38226 new_list
->prev
= NULL
;
38227 new_list
->window_num
= window_num
;
38228 new_list
->num_imm
= 0;
38229 new_list
->num_imm_32
= 0;
38230 new_list
->num_imm_64
= 0;
38231 new_list
->imm_size
= 0;
38232 new_list
->num_loads
= 0;
38233 new_list
->num_stores
= 0;
38234 new_list
->violation
= false;
38236 for (i
= 0; i
< MAX_INSN
; i
++)
38238 new_list
->window
[i
].insn
= NULL
;
38239 new_list
->window
[i
].group
= disp_no_group
;
38240 new_list
->window
[i
].path
= no_path
;
38241 new_list
->window
[i
].byte_len
= 0;
38242 new_list
->window
[i
].imm_bytes
= 0;
38247 /* This function allocates and initializes a dispatch window and the
38248 list container holding a pointer to the window. */
38250 static dispatch_windows
*
38251 allocate_window (void)
38253 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
38254 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
38259 /* This routine initializes the dispatch scheduling information. It
38260 initiates building dispatch scheduler tables and constructs the
38261 first dispatch window. */
38264 init_dispatch_sched (void)
38266 /* Allocate a dispatch list and a window. */
38267 dispatch_window_list
= allocate_window ();
38268 dispatch_window_list1
= allocate_window ();
38273 /* This function returns true if a branch is detected. End of a basic block
38274 does not have to be a branch, but here we assume only branches end a
38278 is_end_basic_block (enum dispatch_group group
)
38280 return group
== disp_branch
;
38283 /* This function is called when the end of a window processing is reached. */
38286 process_end_window (void)
38288 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
38289 if (dispatch_window_list
->next
)
38291 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
38292 gcc_assert (dispatch_window_list
->window_size
38293 + dispatch_window_list1
->window_size
<= 48);
38299 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38300 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38301 for 48 bytes of instructions. Note that these windows are not dispatch
38302 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38304 static dispatch_windows
*
38305 allocate_next_window (int window_num
)
38307 if (window_num
== 0)
38309 if (dispatch_window_list
->next
)
38312 return dispatch_window_list
;
38315 dispatch_window_list
->next
= dispatch_window_list1
;
38316 dispatch_window_list1
->prev
= dispatch_window_list
;
38318 return dispatch_window_list1
;
38321 /* Increment the number of immediate operands of an instruction. */
38324 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
38329 switch ( GET_CODE (*in_rtx
))
38334 (imm_values
->imm
)++;
38335 if (x86_64_immediate_operand (*in_rtx
, SImode
))
38336 (imm_values
->imm32
)++;
38338 (imm_values
->imm64
)++;
38342 (imm_values
->imm
)++;
38343 (imm_values
->imm64
)++;
38347 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
38349 (imm_values
->imm
)++;
38350 (imm_values
->imm32
)++;
38361 /* Compute number of immediate operands of an instruction. */
38364 find_constant (rtx in_rtx
, imm_info
*imm_values
)
38366 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
38367 (rtx_function
) find_constant_1
, (void *) imm_values
);
38370 /* Return total size of immediate operands of an instruction along with number
38371 of corresponding immediate-operands. It initializes its parameters to zero
38372 befor calling FIND_CONSTANT.
38373 INSN is the input instruction. IMM is the total of immediates.
38374 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38378 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
38380 imm_info imm_values
= {0, 0, 0};
38382 find_constant (insn
, &imm_values
);
38383 *imm
= imm_values
.imm
;
38384 *imm32
= imm_values
.imm32
;
38385 *imm64
= imm_values
.imm64
;
38386 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
38389 /* This function indicates if an operand of an instruction is an
38393 has_immediate (rtx insn
)
38395 int num_imm_operand
;
38396 int num_imm32_operand
;
38397 int num_imm64_operand
;
38400 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38401 &num_imm64_operand
);
38405 /* Return single or double path for instructions. */
38407 static enum insn_path
38408 get_insn_path (rtx insn
)
38410 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
38412 if ((int)path
== 0)
38413 return path_single
;
38415 if ((int)path
== 1)
38416 return path_double
;
38421 /* Return insn dispatch group. */
38423 static enum dispatch_group
38424 get_insn_group (rtx insn
)
38426 enum dispatch_group group
= get_mem_group (insn
);
38430 if (is_branch (insn
))
38431 return disp_branch
;
38436 if (has_immediate (insn
))
38439 if (is_prefetch (insn
))
38440 return disp_prefetch
;
38442 return disp_no_group
;
38445 /* Count number of GROUP restricted instructions in a dispatch
38446 window WINDOW_LIST. */
38449 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
38451 enum dispatch_group group
= get_insn_group (insn
);
38453 int num_imm_operand
;
38454 int num_imm32_operand
;
38455 int num_imm64_operand
;
38457 if (group
== disp_no_group
)
38460 if (group
== disp_imm
)
38462 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38463 &num_imm64_operand
);
38464 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
38465 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
38466 || (num_imm32_operand
> 0
38467 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
38468 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
38469 || (num_imm64_operand
> 0
38470 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
38471 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
38472 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
38473 && num_imm64_operand
> 0
38474 && ((window_list
->num_imm_64
> 0
38475 && window_list
->num_insn
>= 2)
38476 || window_list
->num_insn
>= 3)))
38482 if ((group
== disp_load_store
38483 && (window_list
->num_loads
>= MAX_LOAD
38484 || window_list
->num_stores
>= MAX_STORE
))
38485 || ((group
== disp_load
38486 || group
== disp_prefetch
)
38487 && window_list
->num_loads
>= MAX_LOAD
)
38488 || (group
== disp_store
38489 && window_list
->num_stores
>= MAX_STORE
))
38495 /* This function returns true if insn satisfies dispatch rules on the
38496 last window scheduled. */
38499 fits_dispatch_window (rtx insn
)
38501 dispatch_windows
*window_list
= dispatch_window_list
;
38502 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
38503 unsigned int num_restrict
;
38504 enum dispatch_group group
= get_insn_group (insn
);
38505 enum insn_path path
= get_insn_path (insn
);
38508 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38509 instructions should be given the lowest priority in the
38510 scheduling process in Haifa scheduler to make sure they will be
38511 scheduled in the same dispatch window as the refrence to them. */
38512 if (group
== disp_jcc
|| group
== disp_cmp
)
38515 /* Check nonrestricted. */
38516 if (group
== disp_no_group
|| group
== disp_branch
)
38519 /* Get last dispatch window. */
38520 if (window_list_next
)
38521 window_list
= window_list_next
;
38523 if (window_list
->window_num
== 1)
38525 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
38528 || (min_insn_size (insn
) + sum
) >= 48)
38529 /* Window 1 is full. Go for next window. */
38533 num_restrict
= count_num_restricted (insn
, window_list
);
38535 if (num_restrict
> num_allowable_groups
[group
])
38538 /* See if it fits in the first window. */
38539 if (window_list
->window_num
== 0)
38541 /* The first widow should have only single and double path
38543 if (path
== path_double
38544 && (window_list
->num_uops
+ 2) > MAX_INSN
)
38546 else if (path
!= path_single
)
38552 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38553 dispatch window WINDOW_LIST. */
38556 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
38558 int byte_len
= min_insn_size (insn
);
38559 int num_insn
= window_list
->num_insn
;
38561 sched_insn_info
*window
= window_list
->window
;
38562 enum dispatch_group group
= get_insn_group (insn
);
38563 enum insn_path path
= get_insn_path (insn
);
38564 int num_imm_operand
;
38565 int num_imm32_operand
;
38566 int num_imm64_operand
;
38568 if (!window_list
->violation
&& group
!= disp_cmp
38569 && !fits_dispatch_window (insn
))
38570 window_list
->violation
= true;
38572 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38573 &num_imm64_operand
);
38575 /* Initialize window with new instruction. */
38576 window
[num_insn
].insn
= insn
;
38577 window
[num_insn
].byte_len
= byte_len
;
38578 window
[num_insn
].group
= group
;
38579 window
[num_insn
].path
= path
;
38580 window
[num_insn
].imm_bytes
= imm_size
;
38582 window_list
->window_size
+= byte_len
;
38583 window_list
->num_insn
= num_insn
+ 1;
38584 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
38585 window_list
->imm_size
+= imm_size
;
38586 window_list
->num_imm
+= num_imm_operand
;
38587 window_list
->num_imm_32
+= num_imm32_operand
;
38588 window_list
->num_imm_64
+= num_imm64_operand
;
38590 if (group
== disp_store
)
38591 window_list
->num_stores
+= 1;
38592 else if (group
== disp_load
38593 || group
== disp_prefetch
)
38594 window_list
->num_loads
+= 1;
38595 else if (group
== disp_load_store
)
38597 window_list
->num_stores
+= 1;
38598 window_list
->num_loads
+= 1;
38602 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38603 If the total bytes of instructions or the number of instructions in
38604 the window exceed allowable, it allocates a new window. */
38607 add_to_dispatch_window (rtx insn
)
38610 dispatch_windows
*window_list
;
38611 dispatch_windows
*next_list
;
38612 dispatch_windows
*window0_list
;
38613 enum insn_path path
;
38614 enum dispatch_group insn_group
;
38622 if (INSN_CODE (insn
) < 0)
38625 byte_len
= min_insn_size (insn
);
38626 window_list
= dispatch_window_list
;
38627 next_list
= window_list
->next
;
38628 path
= get_insn_path (insn
);
38629 insn_group
= get_insn_group (insn
);
38631 /* Get the last dispatch window. */
38633 window_list
= dispatch_window_list
->next
;
38635 if (path
== path_single
)
38637 else if (path
== path_double
)
38640 insn_num_uops
= (int) path
;
38642 /* If current window is full, get a new window.
38643 Window number zero is full, if MAX_INSN uops are scheduled in it.
38644 Window number one is full, if window zero's bytes plus window
38645 one's bytes is 32, or if the bytes of the new instruction added
38646 to the total makes it greater than 48, or it has already MAX_INSN
38647 instructions in it. */
38648 num_insn
= window_list
->num_insn
;
38649 num_uops
= window_list
->num_uops
;
38650 window_num
= window_list
->window_num
;
38651 insn_fits
= fits_dispatch_window (insn
);
38653 if (num_insn
>= MAX_INSN
38654 || num_uops
+ insn_num_uops
> MAX_INSN
38657 window_num
= ~window_num
& 1;
38658 window_list
= allocate_next_window (window_num
);
38661 if (window_num
== 0)
38663 add_insn_window (insn
, window_list
, insn_num_uops
);
38664 if (window_list
->num_insn
>= MAX_INSN
38665 && insn_group
== disp_branch
)
38667 process_end_window ();
38671 else if (window_num
== 1)
38673 window0_list
= window_list
->prev
;
38674 sum
= window0_list
->window_size
+ window_list
->window_size
;
38676 || (byte_len
+ sum
) >= 48)
38678 process_end_window ();
38679 window_list
= dispatch_window_list
;
38682 add_insn_window (insn
, window_list
, insn_num_uops
);
38685 gcc_unreachable ();
38687 if (is_end_basic_block (insn_group
))
38689 /* End of basic block is reached do end-basic-block process. */
38690 process_end_window ();
38695 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38697 DEBUG_FUNCTION
static void
38698 debug_dispatch_window_file (FILE *file
, int window_num
)
38700 dispatch_windows
*list
;
38703 if (window_num
== 0)
38704 list
= dispatch_window_list
;
38706 list
= dispatch_window_list1
;
38708 fprintf (file
, "Window #%d:\n", list
->window_num
);
38709 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
38710 list
->num_insn
, list
->num_uops
, list
->window_size
);
38711 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38712 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
38714 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
38716 fprintf (file
, " insn info:\n");
38718 for (i
= 0; i
< MAX_INSN
; i
++)
38720 if (!list
->window
[i
].insn
)
38722 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38723 i
, group_name
[list
->window
[i
].group
],
38724 i
, (void *)list
->window
[i
].insn
,
38725 i
, list
->window
[i
].path
,
38726 i
, list
->window
[i
].byte_len
,
38727 i
, list
->window
[i
].imm_bytes
);
38731 /* Print to stdout a dispatch window. */
38733 DEBUG_FUNCTION
void
38734 debug_dispatch_window (int window_num
)
38736 debug_dispatch_window_file (stdout
, window_num
);
38739 /* Print INSN dispatch information to FILE. */
38741 DEBUG_FUNCTION
static void
38742 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
38745 enum insn_path path
;
38746 enum dispatch_group group
;
38748 int num_imm_operand
;
38749 int num_imm32_operand
;
38750 int num_imm64_operand
;
38752 if (INSN_CODE (insn
) < 0)
38755 byte_len
= min_insn_size (insn
);
38756 path
= get_insn_path (insn
);
38757 group
= get_insn_group (insn
);
38758 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38759 &num_imm64_operand
);
38761 fprintf (file
, " insn info:\n");
38762 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
38763 group_name
[group
], path
, byte_len
);
38764 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38765 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
38768 /* Print to STDERR the status of the ready list with respect to
38769 dispatch windows. */
38771 DEBUG_FUNCTION
void
38772 debug_ready_dispatch (void)
38775 int no_ready
= number_in_ready ();
38777 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
38779 for (i
= 0; i
< no_ready
; i
++)
38780 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
38783 /* This routine is the driver of the dispatch scheduler. */
38786 do_dispatch (rtx insn
, int mode
)
38788 if (mode
== DISPATCH_INIT
)
38789 init_dispatch_sched ();
38790 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
38791 add_to_dispatch_window (insn
);
38794 /* Return TRUE if Dispatch Scheduling is supported. */
38797 has_dispatch (rtx insn
, int action
)
38799 if ((ix86_tune
== PROCESSOR_BDVER1
|| ix86_tune
== PROCESSOR_BDVER2
)
38800 && flag_dispatch_scheduler
)
38806 case IS_DISPATCH_ON
:
38811 return is_cmp (insn
);
38813 case DISPATCH_VIOLATION
:
38814 return dispatch_violation ();
38816 case FITS_DISPATCH_WINDOW
:
38817 return fits_dispatch_window (insn
);
38823 /* Implementation of reassociation_width target hook used by
38824 reassoc phase to identify parallelism level in reassociated
38825 tree. Statements tree_code is passed in OPC. Arguments type
38828 Currently parallel reassociation is enabled for Atom
38829 processors only and we set reassociation width to be 2
38830 because Atom may issue up to 2 instructions per cycle.
38832 Return value should be fixed if parallel reassociation is
38833 enabled for other processors. */
38836 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
38837 enum machine_mode mode
)
38841 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
38843 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
38849 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38850 place emms and femms instructions. */
38852 static enum machine_mode
38853 ix86_preferred_simd_mode (enum machine_mode mode
)
38861 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
38863 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
38865 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
38867 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
38870 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
38876 if (!TARGET_VECTORIZE_DOUBLE
)
38878 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
38880 else if (TARGET_SSE2
)
38889 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38892 static unsigned int
38893 ix86_autovectorize_vector_sizes (void)
38895 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
38898 /* Initialize the GCC target structure. */
38899 #undef TARGET_RETURN_IN_MEMORY
38900 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38902 #undef TARGET_LEGITIMIZE_ADDRESS
38903 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38905 #undef TARGET_ATTRIBUTE_TABLE
38906 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38907 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38908 # undef TARGET_MERGE_DECL_ATTRIBUTES
38909 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38912 #undef TARGET_COMP_TYPE_ATTRIBUTES
38913 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38915 #undef TARGET_INIT_BUILTINS
38916 #define TARGET_INIT_BUILTINS ix86_init_builtins
38917 #undef TARGET_BUILTIN_DECL
38918 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38919 #undef TARGET_EXPAND_BUILTIN
38920 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38922 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38923 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38924 ix86_builtin_vectorized_function
38926 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38927 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38929 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38930 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38932 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38933 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38935 #undef TARGET_BUILTIN_RECIPROCAL
38936 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38938 #undef TARGET_ASM_FUNCTION_EPILOGUE
38939 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38941 #undef TARGET_ENCODE_SECTION_INFO
38942 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38943 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38945 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38948 #undef TARGET_ASM_OPEN_PAREN
38949 #define TARGET_ASM_OPEN_PAREN ""
38950 #undef TARGET_ASM_CLOSE_PAREN
38951 #define TARGET_ASM_CLOSE_PAREN ""
38953 #undef TARGET_ASM_BYTE_OP
38954 #define TARGET_ASM_BYTE_OP ASM_BYTE
38956 #undef TARGET_ASM_ALIGNED_HI_OP
38957 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38958 #undef TARGET_ASM_ALIGNED_SI_OP
38959 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38961 #undef TARGET_ASM_ALIGNED_DI_OP
38962 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38965 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38966 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38968 #undef TARGET_ASM_UNALIGNED_HI_OP
38969 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38970 #undef TARGET_ASM_UNALIGNED_SI_OP
38971 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38972 #undef TARGET_ASM_UNALIGNED_DI_OP
38973 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38975 #undef TARGET_PRINT_OPERAND
38976 #define TARGET_PRINT_OPERAND ix86_print_operand
38977 #undef TARGET_PRINT_OPERAND_ADDRESS
38978 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38979 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38980 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38981 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38982 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38984 #undef TARGET_SCHED_INIT_GLOBAL
38985 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38986 #undef TARGET_SCHED_ADJUST_COST
38987 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38988 #undef TARGET_SCHED_ISSUE_RATE
38989 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38990 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38991 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38992 ia32_multipass_dfa_lookahead
38994 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38995 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38998 #undef TARGET_HAVE_TLS
38999 #define TARGET_HAVE_TLS true
39001 #undef TARGET_CANNOT_FORCE_CONST_MEM
39002 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
39003 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
39004 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
39006 #undef TARGET_DELEGITIMIZE_ADDRESS
39007 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
39009 #undef TARGET_MS_BITFIELD_LAYOUT_P
39010 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
39013 #undef TARGET_BINDS_LOCAL_P
39014 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
39016 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39017 #undef TARGET_BINDS_LOCAL_P
39018 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
39021 #undef TARGET_ASM_OUTPUT_MI_THUNK
39022 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
39023 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
39024 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
39026 #undef TARGET_ASM_FILE_START
39027 #define TARGET_ASM_FILE_START x86_file_start
39029 #undef TARGET_OPTION_OVERRIDE
39030 #define TARGET_OPTION_OVERRIDE ix86_option_override
39032 #undef TARGET_REGISTER_MOVE_COST
39033 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
39034 #undef TARGET_MEMORY_MOVE_COST
39035 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
39036 #undef TARGET_RTX_COSTS
39037 #define TARGET_RTX_COSTS ix86_rtx_costs
39038 #undef TARGET_ADDRESS_COST
39039 #define TARGET_ADDRESS_COST ix86_address_cost
39041 #undef TARGET_FIXED_CONDITION_CODE_REGS
39042 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
39043 #undef TARGET_CC_MODES_COMPATIBLE
39044 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
39046 #undef TARGET_MACHINE_DEPENDENT_REORG
39047 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
39049 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
39050 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
39052 #undef TARGET_BUILD_BUILTIN_VA_LIST
39053 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
39055 #undef TARGET_ENUM_VA_LIST_P
39056 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
39058 #undef TARGET_FN_ABI_VA_LIST
39059 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
39061 #undef TARGET_CANONICAL_VA_LIST_TYPE
39062 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
39064 #undef TARGET_EXPAND_BUILTIN_VA_START
39065 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
39067 #undef TARGET_MD_ASM_CLOBBERS
39068 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
39070 #undef TARGET_PROMOTE_PROTOTYPES
39071 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
39072 #undef TARGET_STRUCT_VALUE_RTX
39073 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
39074 #undef TARGET_SETUP_INCOMING_VARARGS
39075 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
39076 #undef TARGET_MUST_PASS_IN_STACK
39077 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
39078 #undef TARGET_FUNCTION_ARG_ADVANCE
39079 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
39080 #undef TARGET_FUNCTION_ARG
39081 #define TARGET_FUNCTION_ARG ix86_function_arg
39082 #undef TARGET_FUNCTION_ARG_BOUNDARY
39083 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
39084 #undef TARGET_PASS_BY_REFERENCE
39085 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
39086 #undef TARGET_INTERNAL_ARG_POINTER
39087 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
39088 #undef TARGET_UPDATE_STACK_BOUNDARY
39089 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
39090 #undef TARGET_GET_DRAP_RTX
39091 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
39092 #undef TARGET_STRICT_ARGUMENT_NAMING
39093 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
39094 #undef TARGET_STATIC_CHAIN
39095 #define TARGET_STATIC_CHAIN ix86_static_chain
39096 #undef TARGET_TRAMPOLINE_INIT
39097 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
39098 #undef TARGET_RETURN_POPS_ARGS
39099 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
39101 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
39102 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
39104 #undef TARGET_SCALAR_MODE_SUPPORTED_P
39105 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
39107 #undef TARGET_VECTOR_MODE_SUPPORTED_P
39108 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
39110 #undef TARGET_C_MODE_FOR_SUFFIX
39111 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
39114 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
39115 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
39118 #ifdef SUBTARGET_INSERT_ATTRIBUTES
39119 #undef TARGET_INSERT_ATTRIBUTES
39120 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
39123 #undef TARGET_MANGLE_TYPE
39124 #define TARGET_MANGLE_TYPE ix86_mangle_type
39127 #undef TARGET_STACK_PROTECT_FAIL
39128 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
39131 #undef TARGET_FUNCTION_VALUE
39132 #define TARGET_FUNCTION_VALUE ix86_function_value
39134 #undef TARGET_FUNCTION_VALUE_REGNO_P
39135 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
39137 #undef TARGET_PROMOTE_FUNCTION_MODE
39138 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
39140 #undef TARGET_SECONDARY_RELOAD
39141 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
39143 #undef TARGET_CLASS_MAX_NREGS
39144 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
39146 #undef TARGET_PREFERRED_RELOAD_CLASS
39147 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
39148 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
39149 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
39150 #undef TARGET_CLASS_LIKELY_SPILLED_P
39151 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
39153 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
39154 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
39155 ix86_builtin_vectorization_cost
39156 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
39157 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
39158 ix86_vectorize_vec_perm_const_ok
39159 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
39160 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
39161 ix86_preferred_simd_mode
39162 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
39163 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
39164 ix86_autovectorize_vector_sizes
39166 #undef TARGET_SET_CURRENT_FUNCTION
39167 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
39169 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
39170 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
39172 #undef TARGET_OPTION_SAVE
39173 #define TARGET_OPTION_SAVE ix86_function_specific_save
39175 #undef TARGET_OPTION_RESTORE
39176 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
39178 #undef TARGET_OPTION_PRINT
39179 #define TARGET_OPTION_PRINT ix86_function_specific_print
39181 #undef TARGET_CAN_INLINE_P
39182 #define TARGET_CAN_INLINE_P ix86_can_inline_p
39184 #undef TARGET_EXPAND_TO_RTL_HOOK
39185 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
39187 #undef TARGET_LEGITIMATE_ADDRESS_P
39188 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
39190 #undef TARGET_LEGITIMATE_CONSTANT_P
39191 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
39193 #undef TARGET_FRAME_POINTER_REQUIRED
39194 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
39196 #undef TARGET_CAN_ELIMINATE
39197 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
39199 #undef TARGET_EXTRA_LIVE_ON_ENTRY
39200 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
39202 #undef TARGET_ASM_CODE_END
39203 #define TARGET_ASM_CODE_END ix86_code_end
39205 #undef TARGET_CONDITIONAL_REGISTER_USAGE
39206 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
39209 #undef TARGET_INIT_LIBFUNCS
39210 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
39213 struct gcc_target targetm
= TARGET_INITIALIZER
;
39215 #include "gt-i386.h"