1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
65 enum upper_128bits_state
72 typedef struct block_info_def
74 /* State of the upper 128bits of AVX registers at exit. */
75 enum upper_128bits_state state
;
76 /* TRUE if state of the upper 128bits of AVX registers is unchanged
79 /* TRUE if block has been processed. */
81 /* TRUE if block has been scanned. */
83 /* Previous state of the upper 128bits of AVX registers at entry. */
84 enum upper_128bits_state prev
;
87 #define BLOCK_INFO(B) ((block_info) (B)->aux)
89 enum call_avx256_state
91 /* Callee returns 256bit AVX register. */
92 callee_return_avx256
= -1,
93 /* Callee returns and passes 256bit AVX register. */
94 callee_return_pass_avx256
,
95 /* Callee passes 256bit AVX register. */
97 /* Callee doesn't return nor passe 256bit AVX register, or no
98 256bit AVX register in function return. */
100 /* vzeroupper intrinsic. */
104 /* Check if a 256bit AVX register is referenced in stores. */
107 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
110 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
111 || (GET_CODE (set
) == SET
112 && REG_P (SET_SRC (set
))
113 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
115 enum upper_128bits_state
*state
116 = (enum upper_128bits_state
*) data
;
121 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
122 in basic block BB. Delete it if upper 128bit AVX registers are
123 unused. If it isn't deleted, move it to just before a jump insn.
125 STATE is state of the upper 128bits of AVX registers at entry. */
128 move_or_delete_vzeroupper_2 (basic_block bb
,
129 enum upper_128bits_state state
)
132 rtx vzeroupper_insn
= NULL_RTX
;
137 if (BLOCK_INFO (bb
)->unchanged
)
140 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
143 BLOCK_INFO (bb
)->state
= state
;
147 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
150 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
151 bb
->index
, BLOCK_INFO (bb
)->state
);
155 BLOCK_INFO (bb
)->prev
= state
;
158 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
163 /* BB_END changes when it is deleted. */
164 bb_end
= BB_END (bb
);
166 while (insn
!= bb_end
)
168 insn
= NEXT_INSN (insn
);
170 if (!NONDEBUG_INSN_P (insn
))
173 /* Move vzeroupper before jump/call. */
174 if (JUMP_P (insn
) || CALL_P (insn
))
176 if (!vzeroupper_insn
)
179 if (PREV_INSN (insn
) != vzeroupper_insn
)
183 fprintf (dump_file
, "Move vzeroupper after:\n");
184 print_rtl_single (dump_file
, PREV_INSN (insn
));
185 fprintf (dump_file
, "before:\n");
186 print_rtl_single (dump_file
, insn
);
188 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
191 vzeroupper_insn
= NULL_RTX
;
195 pat
= PATTERN (insn
);
197 /* Check insn for vzeroupper intrinsic. */
198 if (GET_CODE (pat
) == UNSPEC_VOLATILE
199 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
203 /* Found vzeroupper intrinsic. */
204 fprintf (dump_file
, "Found vzeroupper:\n");
205 print_rtl_single (dump_file
, insn
);
210 /* Check insn for vzeroall intrinsic. */
211 if (GET_CODE (pat
) == PARALLEL
212 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
213 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
218 /* Delete pending vzeroupper insertion. */
221 delete_insn (vzeroupper_insn
);
222 vzeroupper_insn
= NULL_RTX
;
225 else if (state
!= used
)
227 note_stores (pat
, check_avx256_stores
, &state
);
234 /* Process vzeroupper intrinsic. */
235 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
239 /* Since the upper 128bits are cleared, callee must not pass
240 256bit AVX register. We only need to check if callee
241 returns 256bit AVX register. */
242 if (avx256
== callee_return_avx256
)
248 /* Remove unnecessary vzeroupper since upper 128bits are
252 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
253 print_rtl_single (dump_file
, insn
);
259 /* Set state to UNUSED if callee doesn't return 256bit AVX
261 if (avx256
!= callee_return_pass_avx256
)
264 if (avx256
== callee_return_pass_avx256
265 || avx256
== callee_pass_avx256
)
267 /* Must remove vzeroupper since callee passes in 256bit
271 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
272 print_rtl_single (dump_file
, insn
);
278 vzeroupper_insn
= insn
;
284 BLOCK_INFO (bb
)->state
= state
;
285 BLOCK_INFO (bb
)->unchanged
= unchanged
;
286 BLOCK_INFO (bb
)->scanned
= true;
289 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
290 bb
->index
, unchanged
? "unchanged" : "changed",
294 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
295 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
296 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
300 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
304 enum upper_128bits_state state
, old_state
, new_state
;
308 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
309 block
->index
, BLOCK_INFO (block
)->processed
);
311 if (BLOCK_INFO (block
)->processed
)
316 /* Check all predecessor edges of this block. */
317 seen_unknown
= false;
318 FOR_EACH_EDGE (e
, ei
, block
->preds
)
322 switch (BLOCK_INFO (e
->src
)->state
)
325 if (!unknown_is_unused
)
339 old_state
= BLOCK_INFO (block
)->state
;
340 move_or_delete_vzeroupper_2 (block
, state
);
341 new_state
= BLOCK_INFO (block
)->state
;
343 if (state
!= unknown
|| new_state
== used
)
344 BLOCK_INFO (block
)->processed
= true;
346 /* Need to rescan if the upper 128bits of AVX registers are changed
348 if (new_state
!= old_state
)
350 if (new_state
== used
)
351 cfun
->machine
->rescan_vzeroupper_p
= 1;
358 /* Go through the instruction stream looking for vzeroupper. Delete
359 it if upper 128bit AVX registers are unused. If it isn't deleted,
360 move it to just before a jump insn. */
363 move_or_delete_vzeroupper (void)
368 fibheap_t worklist
, pending
, fibheap_swap
;
369 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
374 /* Set up block info for each basic block. */
375 alloc_aux_for_blocks (sizeof (struct block_info_def
));
377 /* Process outgoing edges of entry point. */
379 fprintf (dump_file
, "Process outgoing edges of entry point\n");
381 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
383 move_or_delete_vzeroupper_2 (e
->dest
,
384 cfun
->machine
->caller_pass_avx256_p
386 BLOCK_INFO (e
->dest
)->processed
= true;
389 /* Compute reverse completion order of depth first search of the CFG
390 so that the data-flow runs faster. */
391 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
392 bb_order
= XNEWVEC (int, last_basic_block
);
393 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
394 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
395 bb_order
[rc_order
[i
]] = i
;
398 worklist
= fibheap_new ();
399 pending
= fibheap_new ();
400 visited
= sbitmap_alloc (last_basic_block
);
401 in_worklist
= sbitmap_alloc (last_basic_block
);
402 in_pending
= sbitmap_alloc (last_basic_block
);
403 sbitmap_zero (in_worklist
);
405 /* Don't check outgoing edges of entry point. */
406 sbitmap_ones (in_pending
);
408 if (BLOCK_INFO (bb
)->processed
)
409 RESET_BIT (in_pending
, bb
->index
);
412 move_or_delete_vzeroupper_1 (bb
, false);
413 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
417 fprintf (dump_file
, "Check remaining basic blocks\n");
419 while (!fibheap_empty (pending
))
421 fibheap_swap
= pending
;
423 worklist
= fibheap_swap
;
424 sbitmap_swap
= in_pending
;
425 in_pending
= in_worklist
;
426 in_worklist
= sbitmap_swap
;
428 sbitmap_zero (visited
);
430 cfun
->machine
->rescan_vzeroupper_p
= 0;
432 while (!fibheap_empty (worklist
))
434 bb
= (basic_block
) fibheap_extract_min (worklist
);
435 RESET_BIT (in_worklist
, bb
->index
);
436 gcc_assert (!TEST_BIT (visited
, bb
->index
));
437 if (!TEST_BIT (visited
, bb
->index
))
441 SET_BIT (visited
, bb
->index
);
443 if (move_or_delete_vzeroupper_1 (bb
, false))
444 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
446 if (e
->dest
== EXIT_BLOCK_PTR
447 || BLOCK_INFO (e
->dest
)->processed
)
450 if (TEST_BIT (visited
, e
->dest
->index
))
452 if (!TEST_BIT (in_pending
, e
->dest
->index
))
454 /* Send E->DEST to next round. */
455 SET_BIT (in_pending
, e
->dest
->index
);
456 fibheap_insert (pending
,
457 bb_order
[e
->dest
->index
],
461 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
463 /* Add E->DEST to current round. */
464 SET_BIT (in_worklist
, e
->dest
->index
);
465 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
472 if (!cfun
->machine
->rescan_vzeroupper_p
)
477 fibheap_delete (worklist
);
478 fibheap_delete (pending
);
479 sbitmap_free (visited
);
480 sbitmap_free (in_worklist
);
481 sbitmap_free (in_pending
);
484 fprintf (dump_file
, "Process remaining basic blocks\n");
487 move_or_delete_vzeroupper_1 (bb
, true);
489 free_aux_for_blocks ();
492 static rtx
legitimize_dllimport_symbol (rtx
, bool);
494 #ifndef CHECK_STACK_LIMIT
495 #define CHECK_STACK_LIMIT (-1)
498 /* Return index of given mode in mult and division cost tables. */
499 #define MODE_INDEX(mode) \
500 ((mode) == QImode ? 0 \
501 : (mode) == HImode ? 1 \
502 : (mode) == SImode ? 2 \
503 : (mode) == DImode ? 3 \
506 /* Processor costs (relative to an add) */
507 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
508 #define COSTS_N_BYTES(N) ((N) * 2)
510 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
513 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
514 COSTS_N_BYTES (2), /* cost of an add instruction */
515 COSTS_N_BYTES (3), /* cost of a lea instruction */
516 COSTS_N_BYTES (2), /* variable shift costs */
517 COSTS_N_BYTES (3), /* constant shift costs */
518 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
519 COSTS_N_BYTES (3), /* HI */
520 COSTS_N_BYTES (3), /* SI */
521 COSTS_N_BYTES (3), /* DI */
522 COSTS_N_BYTES (5)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
525 COSTS_N_BYTES (3), /* HI */
526 COSTS_N_BYTES (3), /* SI */
527 COSTS_N_BYTES (3), /* DI */
528 COSTS_N_BYTES (5)}, /* other */
529 COSTS_N_BYTES (3), /* cost of movsx */
530 COSTS_N_BYTES (3), /* cost of movzx */
531 0, /* "large" insn */
533 2, /* cost for loading QImode using movzbl */
534 {2, 2, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 2}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {2, 2, 2}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 3, /* cost of moving MMX register */
544 {3, 3}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {3, 3}, /* cost of storing MMX registers
547 in SImode and DImode */
548 3, /* cost of moving SSE register */
549 {3, 3, 3}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {3, 3, 3}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
559 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
561 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
562 COSTS_N_BYTES (2), /* cost of FABS instruction. */
563 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
564 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
566 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
567 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
568 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 1, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 1, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 /* Processor costs (relative to an add) */
584 struct processor_costs i386_cost
= { /* 386 specific costs */
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (1), /* cost of a lea instruction */
587 COSTS_N_INSNS (3), /* variable shift costs */
588 COSTS_N_INSNS (2), /* constant shift costs */
589 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (6), /* HI */
591 COSTS_N_INSNS (6), /* SI */
592 COSTS_N_INSNS (6), /* DI */
593 COSTS_N_INSNS (6)}, /* other */
594 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (23), /* HI */
597 COSTS_N_INSNS (23), /* SI */
598 COSTS_N_INSNS (23), /* DI */
599 COSTS_N_INSNS (23)}, /* other */
600 COSTS_N_INSNS (3), /* cost of movsx */
601 COSTS_N_INSNS (2), /* cost of movzx */
602 15, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {2, 4, 2}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {2, 4, 2}, /* cost of storing integer registers */
609 2, /* cost of reg,reg fld/fst */
610 {8, 8, 8}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {8, 8, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 8}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 8}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 8, 16}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 8, 16}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 3, /* MMX or SSE register to integer */
625 0, /* size of l1 cache */
626 0, /* size of l2 cache */
627 0, /* size of prefetch block */
628 0, /* number of parallel prefetches */
630 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (22), /* cost of FABS instruction. */
634 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
636 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
637 DUMMY_STRINGOP_ALGS
},
638 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
639 DUMMY_STRINGOP_ALGS
},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs i486_cost
= { /* 486 specific costs */
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (1), /* cost of a lea instruction */
657 COSTS_N_INSNS (3), /* variable shift costs */
658 COSTS_N_INSNS (2), /* constant shift costs */
659 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (12), /* HI */
661 COSTS_N_INSNS (12), /* SI */
662 COSTS_N_INSNS (12), /* DI */
663 COSTS_N_INSNS (12)}, /* other */
664 1, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (40), /* HI */
667 COSTS_N_INSNS (40), /* SI */
668 COSTS_N_INSNS (40), /* DI */
669 COSTS_N_INSNS (40)}, /* other */
670 COSTS_N_INSNS (3), /* cost of movsx */
671 COSTS_N_INSNS (2), /* cost of movzx */
672 15, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {2, 4, 2}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {2, 4, 2}, /* cost of storing integer registers */
679 2, /* cost of reg,reg fld/fst */
680 {8, 8, 8}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {8, 8, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {4, 8}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 8}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 8, 16}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 8, 16}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 3, /* MMX or SSE register to integer */
695 4, /* size of l1 cache. 486 has 8kB cache
696 shared for code and data, so 4kB is
697 not really precise. */
698 4, /* size of l2 cache */
699 0, /* size of prefetch block */
700 0, /* number of parallel prefetches */
702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (3), /* cost of FABS instruction. */
706 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
708 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
709 DUMMY_STRINGOP_ALGS
},
710 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
711 DUMMY_STRINGOP_ALGS
},
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
726 struct processor_costs pentium_cost
= {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1), /* cost of a lea instruction */
729 COSTS_N_INSNS (4), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (11), /* HI */
733 COSTS_N_INSNS (11), /* SI */
734 COSTS_N_INSNS (11), /* DI */
735 COSTS_N_INSNS (11)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (25), /* HI */
739 COSTS_N_INSNS (25), /* SI */
740 COSTS_N_INSNS (25), /* DI */
741 COSTS_N_INSNS (25)}, /* other */
742 COSTS_N_INSNS (3), /* cost of movsx */
743 COSTS_N_INSNS (2), /* cost of movzx */
744 8, /* "large" insn */
746 6, /* cost for loading QImode using movzbl */
747 {2, 4, 2}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {2, 4, 2}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {2, 2, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 6}, /* cost of storing fp registers
755 in SFmode, DFmode and XFmode */
756 8, /* cost of moving MMX register */
757 {8, 8}, /* cost of loading MMX registers
758 in SImode and DImode */
759 {8, 8}, /* cost of storing MMX registers
760 in SImode and DImode */
761 2, /* cost of moving SSE register */
762 {4, 8, 16}, /* cost of loading SSE registers
763 in SImode, DImode and TImode */
764 {4, 8, 16}, /* cost of storing SSE registers
765 in SImode, DImode and TImode */
766 3, /* MMX or SSE register to integer */
767 8, /* size of l1 cache. */
768 8, /* size of l2 cache */
769 0, /* size of prefetch block */
770 0, /* number of parallel prefetches */
772 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
773 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
774 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
775 COSTS_N_INSNS (1), /* cost of FABS instruction. */
776 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
777 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
778 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
779 DUMMY_STRINGOP_ALGS
},
780 {{libcall
, {{-1, rep_prefix_4_byte
}}},
781 DUMMY_STRINGOP_ALGS
},
782 1, /* scalar_stmt_cost. */
783 1, /* scalar load_cost. */
784 1, /* scalar_store_cost. */
785 1, /* vec_stmt_cost. */
786 1, /* vec_to_scalar_cost. */
787 1, /* scalar_to_vec_cost. */
788 1, /* vec_align_load_cost. */
789 2, /* vec_unalign_load_cost. */
790 1, /* vec_store_cost. */
791 3, /* cond_taken_branch_cost. */
792 1, /* cond_not_taken_branch_cost. */
796 struct processor_costs pentiumpro_cost
= {
797 COSTS_N_INSNS (1), /* cost of an add instruction */
798 COSTS_N_INSNS (1), /* cost of a lea instruction */
799 COSTS_N_INSNS (1), /* variable shift costs */
800 COSTS_N_INSNS (1), /* constant shift costs */
801 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
802 COSTS_N_INSNS (4), /* HI */
803 COSTS_N_INSNS (4), /* SI */
804 COSTS_N_INSNS (4), /* DI */
805 COSTS_N_INSNS (4)}, /* other */
806 0, /* cost of multiply per each bit set */
807 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
808 COSTS_N_INSNS (17), /* HI */
809 COSTS_N_INSNS (17), /* SI */
810 COSTS_N_INSNS (17), /* DI */
811 COSTS_N_INSNS (17)}, /* other */
812 COSTS_N_INSNS (1), /* cost of movsx */
813 COSTS_N_INSNS (1), /* cost of movzx */
814 8, /* "large" insn */
816 2, /* cost for loading QImode using movzbl */
817 {4, 4, 4}, /* cost of loading integer registers
818 in QImode, HImode and SImode.
819 Relative to reg-reg move (2). */
820 {2, 2, 2}, /* cost of storing integer registers */
821 2, /* cost of reg,reg fld/fst */
822 {2, 2, 6}, /* cost of loading fp registers
823 in SFmode, DFmode and XFmode */
824 {4, 4, 6}, /* cost of storing fp registers
825 in SFmode, DFmode and XFmode */
826 2, /* cost of moving MMX register */
827 {2, 2}, /* cost of loading MMX registers
828 in SImode and DImode */
829 {2, 2}, /* cost of storing MMX registers
830 in SImode and DImode */
831 2, /* cost of moving SSE register */
832 {2, 2, 8}, /* cost of loading SSE registers
833 in SImode, DImode and TImode */
834 {2, 2, 8}, /* cost of storing SSE registers
835 in SImode, DImode and TImode */
836 3, /* MMX or SSE register to integer */
837 8, /* size of l1 cache. */
838 256, /* size of l2 cache */
839 32, /* size of prefetch block */
840 6, /* number of parallel prefetches */
842 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
843 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
844 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
845 COSTS_N_INSNS (2), /* cost of FABS instruction. */
846 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
847 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
848 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
849 (we ensure the alignment). For small blocks inline loop is still a
850 noticeable win, for bigger blocks either rep movsl or rep movsb is
851 way to go. Rep movsb has apparently more expensive startup time in CPU,
852 but after 4K the difference is down in the noise. */
853 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
854 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
855 DUMMY_STRINGOP_ALGS
},
856 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
857 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
858 DUMMY_STRINGOP_ALGS
},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
873 struct processor_costs geode_cost
= {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (1), /* cost of a lea instruction */
876 COSTS_N_INSNS (2), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (4), /* HI */
880 COSTS_N_INSNS (7), /* SI */
881 COSTS_N_INSNS (7), /* DI */
882 COSTS_N_INSNS (7)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (23), /* HI */
886 COSTS_N_INSNS (39), /* SI */
887 COSTS_N_INSNS (39), /* DI */
888 COSTS_N_INSNS (39)}, /* other */
889 COSTS_N_INSNS (1), /* cost of movsx */
890 COSTS_N_INSNS (1), /* cost of movzx */
891 8, /* "large" insn */
893 1, /* cost for loading QImode using movzbl */
894 {1, 1, 1}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {1, 1, 1}, /* cost of storing integer registers */
898 1, /* cost of reg,reg fld/fst */
899 {1, 1, 1}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 6, 6}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
904 1, /* cost of moving MMX register */
905 {1, 1}, /* cost of loading MMX registers
906 in SImode and DImode */
907 {1, 1}, /* cost of storing MMX registers
908 in SImode and DImode */
909 1, /* cost of moving SSE register */
910 {1, 1, 1}, /* cost of loading SSE registers
911 in SImode, DImode and TImode */
912 {1, 1, 1}, /* cost of storing SSE registers
913 in SImode, DImode and TImode */
914 1, /* MMX or SSE register to integer */
915 64, /* size of l1 cache. */
916 128, /* size of l2 cache. */
917 32, /* size of prefetch block */
918 1, /* number of parallel prefetches */
920 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (1), /* cost of FABS instruction. */
924 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
926 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
927 DUMMY_STRINGOP_ALGS
},
928 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
929 DUMMY_STRINGOP_ALGS
},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k6_cost
= {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (3), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (3), /* DI */
953 COSTS_N_INSNS (3)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (18), /* HI */
957 COSTS_N_INSNS (18), /* SI */
958 COSTS_N_INSNS (18), /* DI */
959 COSTS_N_INSNS (18)}, /* other */
960 COSTS_N_INSNS (2), /* cost of movsx */
961 COSTS_N_INSNS (2), /* cost of movzx */
962 8, /* "large" insn */
964 3, /* cost for loading QImode using movzbl */
965 {4, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {2, 3, 2}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {6, 6, 6}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 4}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {2, 2}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {2, 2}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {2, 2, 8}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {2, 2, 8}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 6, /* MMX or SSE register to integer */
985 32, /* size of l1 cache. */
986 32, /* size of l2 cache. Some models
987 have integrated l2 cache, but
988 optimizing for k6 is not important
989 enough to worry about that. */
990 32, /* size of prefetch block */
991 1, /* number of parallel prefetches */
993 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
994 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
995 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
996 COSTS_N_INSNS (2), /* cost of FABS instruction. */
997 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
998 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
999 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1000 DUMMY_STRINGOP_ALGS
},
1001 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1002 DUMMY_STRINGOP_ALGS
},
1003 1, /* scalar_stmt_cost. */
1004 1, /* scalar load_cost. */
1005 1, /* scalar_store_cost. */
1006 1, /* vec_stmt_cost. */
1007 1, /* vec_to_scalar_cost. */
1008 1, /* scalar_to_vec_cost. */
1009 1, /* vec_align_load_cost. */
1010 2, /* vec_unalign_load_cost. */
1011 1, /* vec_store_cost. */
1012 3, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs athlon_cost
= {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (2), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (5), /* HI */
1024 COSTS_N_INSNS (5), /* SI */
1025 COSTS_N_INSNS (5), /* DI */
1026 COSTS_N_INSNS (5)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (26), /* HI */
1030 COSTS_N_INSNS (42), /* SI */
1031 COSTS_N_INSNS (74), /* DI */
1032 COSTS_N_INSNS (74)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {3, 4, 3}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {3, 4, 3}, /* cost of storing integer registers */
1042 4, /* cost of reg,reg fld/fst */
1043 {4, 4, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {6, 6, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 6}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 5}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 5, /* MMX or SSE register to integer */
1058 64, /* size of l1 cache. */
1059 256, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 6, /* number of parallel prefetches */
1062 5, /* Branch cost */
1063 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1069 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1070 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1071 128 bytes for memset. */
1072 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1073 DUMMY_STRINGOP_ALGS
},
1074 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1075 DUMMY_STRINGOP_ALGS
},
1076 1, /* scalar_stmt_cost. */
1077 1, /* scalar load_cost. */
1078 1, /* scalar_store_cost. */
1079 1, /* vec_stmt_cost. */
1080 1, /* vec_to_scalar_cost. */
1081 1, /* scalar_to_vec_cost. */
1082 1, /* vec_align_load_cost. */
1083 2, /* vec_unalign_load_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1090 struct processor_costs k8_cost
= {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (2), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (3), /* SI */
1098 COSTS_N_INSNS (4), /* DI */
1099 COSTS_N_INSNS (5)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (26), /* HI */
1103 COSTS_N_INSNS (42), /* SI */
1104 COSTS_N_INSNS (74), /* DI */
1105 COSTS_N_INSNS (74)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {3, 4, 3}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {3, 4, 3}, /* cost of storing integer registers */
1115 4, /* cost of reg,reg fld/fst */
1116 {4, 4, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {6, 6, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {3, 3}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 3, 6}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 5}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 5, /* MMX or SSE register to integer */
1131 64, /* size of l1 cache. */
1132 512, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 3, /* Branch cost */
1141 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1147 /* K8 has optimized REP instruction for medium sized blocks, but for very
1148 small blocks it is better to use loop. For large blocks, libcall can
1149 do nontemporary accesses and beat inline considerably. */
1150 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1151 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1152 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1153 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1154 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1155 4, /* scalar_stmt_cost. */
1156 2, /* scalar load_cost. */
1157 2, /* scalar_store_cost. */
1158 5, /* vec_stmt_cost. */
1159 0, /* vec_to_scalar_cost. */
1160 2, /* scalar_to_vec_cost. */
1161 2, /* vec_align_load_cost. */
1162 3, /* vec_unalign_load_cost. */
1163 3, /* vec_store_cost. */
1164 3, /* cond_taken_branch_cost. */
1165 2, /* cond_not_taken_branch_cost. */
1168 struct processor_costs amdfam10_cost
= {
1169 COSTS_N_INSNS (1), /* cost of an add instruction */
1170 COSTS_N_INSNS (2), /* cost of a lea instruction */
1171 COSTS_N_INSNS (1), /* variable shift costs */
1172 COSTS_N_INSNS (1), /* constant shift costs */
1173 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1174 COSTS_N_INSNS (4), /* HI */
1175 COSTS_N_INSNS (3), /* SI */
1176 COSTS_N_INSNS (4), /* DI */
1177 COSTS_N_INSNS (5)}, /* other */
1178 0, /* cost of multiply per each bit set */
1179 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1180 COSTS_N_INSNS (35), /* HI */
1181 COSTS_N_INSNS (51), /* SI */
1182 COSTS_N_INSNS (83), /* DI */
1183 COSTS_N_INSNS (83)}, /* other */
1184 COSTS_N_INSNS (1), /* cost of movsx */
1185 COSTS_N_INSNS (1), /* cost of movzx */
1186 8, /* "large" insn */
1188 4, /* cost for loading QImode using movzbl */
1189 {3, 4, 3}, /* cost of loading integer registers
1190 in QImode, HImode and SImode.
1191 Relative to reg-reg move (2). */
1192 {3, 4, 3}, /* cost of storing integer registers */
1193 4, /* cost of reg,reg fld/fst */
1194 {4, 4, 12}, /* cost of loading fp registers
1195 in SFmode, DFmode and XFmode */
1196 {6, 6, 8}, /* cost of storing fp registers
1197 in SFmode, DFmode and XFmode */
1198 2, /* cost of moving MMX register */
1199 {3, 3}, /* cost of loading MMX registers
1200 in SImode and DImode */
1201 {4, 4}, /* cost of storing MMX registers
1202 in SImode and DImode */
1203 2, /* cost of moving SSE register */
1204 {4, 4, 3}, /* cost of loading SSE registers
1205 in SImode, DImode and TImode */
1206 {4, 4, 5}, /* cost of storing SSE registers
1207 in SImode, DImode and TImode */
1208 3, /* MMX or SSE register to integer */
1210 MOVD reg64, xmmreg Double FSTORE 4
1211 MOVD reg32, xmmreg Double FSTORE 4
1213 MOVD reg64, xmmreg Double FADD 3
1215 MOVD reg32, xmmreg Double FADD 3
1217 64, /* size of l1 cache. */
1218 512, /* size of l2 cache. */
1219 64, /* size of prefetch block */
1220 /* New AMD processors never drop prefetches; if they cannot be performed
1221 immediately, they are queued. We set number of simultaneous prefetches
1222 to a large constant to reflect this (it probably is not a good idea not
1223 to limit number of prefetches at all, as their execution also takes some
1225 100, /* number of parallel prefetches */
1226 2, /* Branch cost */
1227 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1228 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1229 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1230 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1231 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1232 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1234 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1235 very small blocks it is better to use loop. For large blocks, libcall can
1236 do nontemporary accesses and beat inline considerably. */
1237 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1238 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1239 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1240 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1241 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1242 4, /* scalar_stmt_cost. */
1243 2, /* scalar load_cost. */
1244 2, /* scalar_store_cost. */
1245 6, /* vec_stmt_cost. */
1246 0, /* vec_to_scalar_cost. */
1247 2, /* scalar_to_vec_cost. */
1248 2, /* vec_align_load_cost. */
1249 2, /* vec_unalign_load_cost. */
1250 2, /* vec_store_cost. */
1251 2, /* cond_taken_branch_cost. */
1252 1, /* cond_not_taken_branch_cost. */
1255 struct processor_costs bdver1_cost
= {
1256 COSTS_N_INSNS (1), /* cost of an add instruction */
1257 COSTS_N_INSNS (1), /* cost of a lea instruction */
1258 COSTS_N_INSNS (1), /* variable shift costs */
1259 COSTS_N_INSNS (1), /* constant shift costs */
1260 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1261 COSTS_N_INSNS (4), /* HI */
1262 COSTS_N_INSNS (4), /* SI */
1263 COSTS_N_INSNS (6), /* DI */
1264 COSTS_N_INSNS (6)}, /* other */
1265 0, /* cost of multiply per each bit set */
1266 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1267 COSTS_N_INSNS (35), /* HI */
1268 COSTS_N_INSNS (51), /* SI */
1269 COSTS_N_INSNS (83), /* DI */
1270 COSTS_N_INSNS (83)}, /* other */
1271 COSTS_N_INSNS (1), /* cost of movsx */
1272 COSTS_N_INSNS (1), /* cost of movzx */
1273 8, /* "large" insn */
1275 4, /* cost for loading QImode using movzbl */
1276 {5, 5, 4}, /* cost of loading integer registers
1277 in QImode, HImode and SImode.
1278 Relative to reg-reg move (2). */
1279 {4, 4, 4}, /* cost of storing integer registers */
1280 2, /* cost of reg,reg fld/fst */
1281 {5, 5, 12}, /* cost of loading fp registers
1282 in SFmode, DFmode and XFmode */
1283 {4, 4, 8}, /* cost of storing fp registers
1284 in SFmode, DFmode and XFmode */
1285 2, /* cost of moving MMX register */
1286 {4, 4}, /* cost of loading MMX registers
1287 in SImode and DImode */
1288 {4, 4}, /* cost of storing MMX registers
1289 in SImode and DImode */
1290 2, /* cost of moving SSE register */
1291 {4, 4, 4}, /* cost of loading SSE registers
1292 in SImode, DImode and TImode */
1293 {4, 4, 4}, /* cost of storing SSE registers
1294 in SImode, DImode and TImode */
1295 2, /* MMX or SSE register to integer */
1297 MOVD reg64, xmmreg Double FSTORE 4
1298 MOVD reg32, xmmreg Double FSTORE 4
1300 MOVD reg64, xmmreg Double FADD 3
1302 MOVD reg32, xmmreg Double FADD 3
1304 16, /* size of l1 cache. */
1305 2048, /* size of l2 cache. */
1306 64, /* size of prefetch block */
1307 /* New AMD processors never drop prefetches; if they cannot be performed
1308 immediately, they are queued. We set number of simultaneous prefetches
1309 to a large constant to reflect this (it probably is not a good idea not
1310 to limit number of prefetches at all, as their execution also takes some
1312 100, /* number of parallel prefetches */
1313 2, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1322 very small blocks it is better to use loop. For large blocks, libcall
1323 can do nontemporary accesses and beat inline considerably. */
1324 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1325 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1326 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1327 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1328 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1329 6, /* scalar_stmt_cost. */
1330 4, /* scalar load_cost. */
1331 4, /* scalar_store_cost. */
1332 6, /* vec_stmt_cost. */
1333 0, /* vec_to_scalar_cost. */
1334 2, /* scalar_to_vec_cost. */
1335 4, /* vec_align_load_cost. */
1336 4, /* vec_unalign_load_cost. */
1337 4, /* vec_store_cost. */
1338 2, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1342 struct processor_costs bdver2_cost
= {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (1), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (4), /* HI */
1349 COSTS_N_INSNS (4), /* SI */
1350 COSTS_N_INSNS (6), /* DI */
1351 COSTS_N_INSNS (6)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (35), /* HI */
1355 COSTS_N_INSNS (51), /* SI */
1356 COSTS_N_INSNS (83), /* DI */
1357 COSTS_N_INSNS (83)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 8, /* "large" insn */
1362 4, /* cost for loading QImode using movzbl */
1363 {5, 5, 4}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {4, 4, 4}, /* cost of storing integer registers */
1367 2, /* cost of reg,reg fld/fst */
1368 {5, 5, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {4, 4, 8}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 2, /* cost of moving MMX register */
1373 {4, 4}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {4, 4}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 2, /* cost of moving SSE register */
1378 {4, 4, 4}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {4, 4, 4}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 2, /* MMX or SSE register to integer */
1384 MOVD reg64, xmmreg Double FSTORE 4
1385 MOVD reg32, xmmreg Double FSTORE 4
1387 MOVD reg64, xmmreg Double FADD 3
1389 MOVD reg32, xmmreg Double FADD 3
1391 16, /* size of l1 cache. */
1392 2048, /* size of l2 cache. */
1393 64, /* size of prefetch block */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches */
1400 2, /* Branch cost */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1408 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1409 very small blocks it is better to use loop. For large blocks, libcall
1410 can do nontemporary accesses and beat inline considerably. */
1411 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1412 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1413 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1414 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1415 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1416 6, /* scalar_stmt_cost. */
1417 4, /* scalar load_cost. */
1418 4, /* scalar_store_cost. */
1419 6, /* vec_stmt_cost. */
1420 0, /* vec_to_scalar_cost. */
1421 2, /* scalar_to_vec_cost. */
1422 4, /* vec_align_load_cost. */
1423 4, /* vec_unalign_load_cost. */
1424 4, /* vec_store_cost. */
1425 2, /* cond_taken_branch_cost. */
1426 1, /* cond_not_taken_branch_cost. */
1429 struct processor_costs btver1_cost
= {
1430 COSTS_N_INSNS (1), /* cost of an add instruction */
1431 COSTS_N_INSNS (2), /* cost of a lea instruction */
1432 COSTS_N_INSNS (1), /* variable shift costs */
1433 COSTS_N_INSNS (1), /* constant shift costs */
1434 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1435 COSTS_N_INSNS (4), /* HI */
1436 COSTS_N_INSNS (3), /* SI */
1437 COSTS_N_INSNS (4), /* DI */
1438 COSTS_N_INSNS (5)}, /* other */
1439 0, /* cost of multiply per each bit set */
1440 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1441 COSTS_N_INSNS (35), /* HI */
1442 COSTS_N_INSNS (51), /* SI */
1443 COSTS_N_INSNS (83), /* DI */
1444 COSTS_N_INSNS (83)}, /* other */
1445 COSTS_N_INSNS (1), /* cost of movsx */
1446 COSTS_N_INSNS (1), /* cost of movzx */
1447 8, /* "large" insn */
1449 4, /* cost for loading QImode using movzbl */
1450 {3, 4, 3}, /* cost of loading integer registers
1451 in QImode, HImode and SImode.
1452 Relative to reg-reg move (2). */
1453 {3, 4, 3}, /* cost of storing integer registers */
1454 4, /* cost of reg,reg fld/fst */
1455 {4, 4, 12}, /* cost of loading fp registers
1456 in SFmode, DFmode and XFmode */
1457 {6, 6, 8}, /* cost of storing fp registers
1458 in SFmode, DFmode and XFmode */
1459 2, /* cost of moving MMX register */
1460 {3, 3}, /* cost of loading MMX registers
1461 in SImode and DImode */
1462 {4, 4}, /* cost of storing MMX registers
1463 in SImode and DImode */
1464 2, /* cost of moving SSE register */
1465 {4, 4, 3}, /* cost of loading SSE registers
1466 in SImode, DImode and TImode */
1467 {4, 4, 5}, /* cost of storing SSE registers
1468 in SImode, DImode and TImode */
1469 3, /* MMX or SSE register to integer */
1471 MOVD reg64, xmmreg Double FSTORE 4
1472 MOVD reg32, xmmreg Double FSTORE 4
1474 MOVD reg64, xmmreg Double FADD 3
1476 MOVD reg32, xmmreg Double FADD 3
1478 32, /* size of l1 cache. */
1479 512, /* size of l2 cache. */
1480 64, /* size of prefetch block */
1481 100, /* number of parallel prefetches */
1482 2, /* Branch cost */
1483 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1484 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1485 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1486 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1487 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1488 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1490 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1491 very small blocks it is better to use loop. For large blocks, libcall can
1492 do nontemporary accesses and beat inline considerably. */
1493 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1494 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1495 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1496 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1497 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1498 4, /* scalar_stmt_cost. */
1499 2, /* scalar load_cost. */
1500 2, /* scalar_store_cost. */
1501 6, /* vec_stmt_cost. */
1502 0, /* vec_to_scalar_cost. */
1503 2, /* scalar_to_vec_cost. */
1504 2, /* vec_align_load_cost. */
1505 2, /* vec_unalign_load_cost. */
1506 2, /* vec_store_cost. */
1507 2, /* cond_taken_branch_cost. */
1508 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs pentium4_cost
= {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (3), /* cost of a lea instruction */
1515 COSTS_N_INSNS (4), /* variable shift costs */
1516 COSTS_N_INSNS (4), /* constant shift costs */
1517 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (15), /* HI */
1519 COSTS_N_INSNS (15), /* SI */
1520 COSTS_N_INSNS (15), /* DI */
1521 COSTS_N_INSNS (15)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (56), /* HI */
1525 COSTS_N_INSNS (56), /* SI */
1526 COSTS_N_INSNS (56), /* DI */
1527 COSTS_N_INSNS (56)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 16, /* "large" insn */
1532 2, /* cost for loading QImode using movzbl */
1533 {4, 5, 4}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {2, 3, 2}, /* cost of storing integer registers */
1537 2, /* cost of reg,reg fld/fst */
1538 {2, 2, 6}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {4, 4, 6}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {2, 2}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {2, 2}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 12, /* cost of moving SSE register */
1548 {12, 12, 12}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {2, 2, 8}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 10, /* MMX or SSE register to integer */
1553 8, /* size of l1 cache. */
1554 256, /* size of l2 cache. */
1555 64, /* size of prefetch block */
1556 6, /* number of parallel prefetches */
1557 2, /* Branch cost */
1558 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1559 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1560 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1563 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1564 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1565 DUMMY_STRINGOP_ALGS
},
1566 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1568 DUMMY_STRINGOP_ALGS
},
1569 1, /* scalar_stmt_cost. */
1570 1, /* scalar load_cost. */
1571 1, /* scalar_store_cost. */
1572 1, /* vec_stmt_cost. */
1573 1, /* vec_to_scalar_cost. */
1574 1, /* scalar_to_vec_cost. */
1575 1, /* vec_align_load_cost. */
1576 2, /* vec_unalign_load_cost. */
1577 1, /* vec_store_cost. */
1578 3, /* cond_taken_branch_cost. */
1579 1, /* cond_not_taken_branch_cost. */
1583 struct processor_costs nocona_cost
= {
1584 COSTS_N_INSNS (1), /* cost of an add instruction */
1585 COSTS_N_INSNS (1), /* cost of a lea instruction */
1586 COSTS_N_INSNS (1), /* variable shift costs */
1587 COSTS_N_INSNS (1), /* constant shift costs */
1588 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1589 COSTS_N_INSNS (10), /* HI */
1590 COSTS_N_INSNS (10), /* SI */
1591 COSTS_N_INSNS (10), /* DI */
1592 COSTS_N_INSNS (10)}, /* other */
1593 0, /* cost of multiply per each bit set */
1594 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1595 COSTS_N_INSNS (66), /* HI */
1596 COSTS_N_INSNS (66), /* SI */
1597 COSTS_N_INSNS (66), /* DI */
1598 COSTS_N_INSNS (66)}, /* other */
1599 COSTS_N_INSNS (1), /* cost of movsx */
1600 COSTS_N_INSNS (1), /* cost of movzx */
1601 16, /* "large" insn */
1602 17, /* MOVE_RATIO */
1603 4, /* cost for loading QImode using movzbl */
1604 {4, 4, 4}, /* cost of loading integer registers
1605 in QImode, HImode and SImode.
1606 Relative to reg-reg move (2). */
1607 {4, 4, 4}, /* cost of storing integer registers */
1608 3, /* cost of reg,reg fld/fst */
1609 {12, 12, 12}, /* cost of loading fp registers
1610 in SFmode, DFmode and XFmode */
1611 {4, 4, 4}, /* cost of storing fp registers
1612 in SFmode, DFmode and XFmode */
1613 6, /* cost of moving MMX register */
1614 {12, 12}, /* cost of loading MMX registers
1615 in SImode and DImode */
1616 {12, 12}, /* cost of storing MMX registers
1617 in SImode and DImode */
1618 6, /* cost of moving SSE register */
1619 {12, 12, 12}, /* cost of loading SSE registers
1620 in SImode, DImode and TImode */
1621 {12, 12, 12}, /* cost of storing SSE registers
1622 in SImode, DImode and TImode */
1623 8, /* MMX or SSE register to integer */
1624 8, /* size of l1 cache. */
1625 1024, /* size of l2 cache. */
1626 128, /* size of prefetch block */
1627 8, /* number of parallel prefetches */
1628 1, /* Branch cost */
1629 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1630 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1631 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1632 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1633 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1634 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1635 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1636 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1637 {100000, unrolled_loop
}, {-1, libcall
}}}},
1638 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1640 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1641 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1642 1, /* scalar_stmt_cost. */
1643 1, /* scalar load_cost. */
1644 1, /* scalar_store_cost. */
1645 1, /* vec_stmt_cost. */
1646 1, /* vec_to_scalar_cost. */
1647 1, /* scalar_to_vec_cost. */
1648 1, /* vec_align_load_cost. */
1649 2, /* vec_unalign_load_cost. */
1650 1, /* vec_store_cost. */
1651 3, /* cond_taken_branch_cost. */
1652 1, /* cond_not_taken_branch_cost. */
1656 struct processor_costs atom_cost
= {
1657 COSTS_N_INSNS (1), /* cost of an add instruction */
1658 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1659 COSTS_N_INSNS (1), /* variable shift costs */
1660 COSTS_N_INSNS (1), /* constant shift costs */
1661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1662 COSTS_N_INSNS (4), /* HI */
1663 COSTS_N_INSNS (3), /* SI */
1664 COSTS_N_INSNS (4), /* DI */
1665 COSTS_N_INSNS (2)}, /* other */
1666 0, /* cost of multiply per each bit set */
1667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1668 COSTS_N_INSNS (26), /* HI */
1669 COSTS_N_INSNS (42), /* SI */
1670 COSTS_N_INSNS (74), /* DI */
1671 COSTS_N_INSNS (74)}, /* other */
1672 COSTS_N_INSNS (1), /* cost of movsx */
1673 COSTS_N_INSNS (1), /* cost of movzx */
1674 8, /* "large" insn */
1675 17, /* MOVE_RATIO */
1676 4, /* cost for loading QImode using movzbl */
1677 {4, 4, 4}, /* cost of loading integer registers
1678 in QImode, HImode and SImode.
1679 Relative to reg-reg move (2). */
1680 {4, 4, 4}, /* cost of storing integer registers */
1681 4, /* cost of reg,reg fld/fst */
1682 {12, 12, 12}, /* cost of loading fp registers
1683 in SFmode, DFmode and XFmode */
1684 {6, 6, 8}, /* cost of storing fp registers
1685 in SFmode, DFmode and XFmode */
1686 2, /* cost of moving MMX register */
1687 {8, 8}, /* cost of loading MMX registers
1688 in SImode and DImode */
1689 {8, 8}, /* cost of storing MMX registers
1690 in SImode and DImode */
1691 2, /* cost of moving SSE register */
1692 {8, 8, 8}, /* cost of loading SSE registers
1693 in SImode, DImode and TImode */
1694 {8, 8, 8}, /* cost of storing SSE registers
1695 in SImode, DImode and TImode */
1696 5, /* MMX or SSE register to integer */
1697 32, /* size of l1 cache. */
1698 256, /* size of l2 cache. */
1699 64, /* size of prefetch block */
1700 6, /* number of parallel prefetches */
1701 3, /* Branch cost */
1702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1703 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1704 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1705 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1706 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1707 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1708 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1709 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1710 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1711 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1712 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1713 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1714 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1715 1, /* scalar_stmt_cost. */
1716 1, /* scalar load_cost. */
1717 1, /* scalar_store_cost. */
1718 1, /* vec_stmt_cost. */
1719 1, /* vec_to_scalar_cost. */
1720 1, /* scalar_to_vec_cost. */
1721 1, /* vec_align_load_cost. */
1722 2, /* vec_unalign_load_cost. */
1723 1, /* vec_store_cost. */
1724 3, /* cond_taken_branch_cost. */
1725 1, /* cond_not_taken_branch_cost. */
1728 /* Generic64 should produce code tuned for Nocona and K8. */
1730 struct processor_costs generic64_cost
= {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 /* On all chips taken into consideration lea is 2 cycles and more. With
1733 this cost however our current implementation of synth_mult results in
1734 use of unnecessary temporary registers causing regression on several
1735 SPECfp benchmarks. */
1736 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1737 COSTS_N_INSNS (1), /* variable shift costs */
1738 COSTS_N_INSNS (1), /* constant shift costs */
1739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1740 COSTS_N_INSNS (4), /* HI */
1741 COSTS_N_INSNS (3), /* SI */
1742 COSTS_N_INSNS (4), /* DI */
1743 COSTS_N_INSNS (2)}, /* other */
1744 0, /* cost of multiply per each bit set */
1745 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1746 COSTS_N_INSNS (26), /* HI */
1747 COSTS_N_INSNS (42), /* SI */
1748 COSTS_N_INSNS (74), /* DI */
1749 COSTS_N_INSNS (74)}, /* other */
1750 COSTS_N_INSNS (1), /* cost of movsx */
1751 COSTS_N_INSNS (1), /* cost of movzx */
1752 8, /* "large" insn */
1753 17, /* MOVE_RATIO */
1754 4, /* cost for loading QImode using movzbl */
1755 {4, 4, 4}, /* cost of loading integer registers
1756 in QImode, HImode and SImode.
1757 Relative to reg-reg move (2). */
1758 {4, 4, 4}, /* cost of storing integer registers */
1759 4, /* cost of reg,reg fld/fst */
1760 {12, 12, 12}, /* cost of loading fp registers
1761 in SFmode, DFmode and XFmode */
1762 {6, 6, 8}, /* cost of storing fp registers
1763 in SFmode, DFmode and XFmode */
1764 2, /* cost of moving MMX register */
1765 {8, 8}, /* cost of loading MMX registers
1766 in SImode and DImode */
1767 {8, 8}, /* cost of storing MMX registers
1768 in SImode and DImode */
1769 2, /* cost of moving SSE register */
1770 {8, 8, 8}, /* cost of loading SSE registers
1771 in SImode, DImode and TImode */
1772 {8, 8, 8}, /* cost of storing SSE registers
1773 in SImode, DImode and TImode */
1774 5, /* MMX or SSE register to integer */
1775 32, /* size of l1 cache. */
1776 512, /* size of l2 cache. */
1777 64, /* size of prefetch block */
1778 6, /* number of parallel prefetches */
1779 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1780 value is increased to perhaps more appropriate value of 5. */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {DUMMY_STRINGOP_ALGS
,
1789 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1790 {DUMMY_STRINGOP_ALGS
,
1791 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1792 1, /* scalar_stmt_cost. */
1793 1, /* scalar load_cost. */
1794 1, /* scalar_store_cost. */
1795 1, /* vec_stmt_cost. */
1796 1, /* vec_to_scalar_cost. */
1797 1, /* scalar_to_vec_cost. */
1798 1, /* vec_align_load_cost. */
1799 2, /* vec_unalign_load_cost. */
1800 1, /* vec_store_cost. */
1801 3, /* cond_taken_branch_cost. */
1802 1, /* cond_not_taken_branch_cost. */
1805 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1808 struct processor_costs generic32_cost
= {
1809 COSTS_N_INSNS (1), /* cost of an add instruction */
1810 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1811 COSTS_N_INSNS (1), /* variable shift costs */
1812 COSTS_N_INSNS (1), /* constant shift costs */
1813 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1814 COSTS_N_INSNS (4), /* HI */
1815 COSTS_N_INSNS (3), /* SI */
1816 COSTS_N_INSNS (4), /* DI */
1817 COSTS_N_INSNS (2)}, /* other */
1818 0, /* cost of multiply per each bit set */
1819 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1820 COSTS_N_INSNS (26), /* HI */
1821 COSTS_N_INSNS (42), /* SI */
1822 COSTS_N_INSNS (74), /* DI */
1823 COSTS_N_INSNS (74)}, /* other */
1824 COSTS_N_INSNS (1), /* cost of movsx */
1825 COSTS_N_INSNS (1), /* cost of movzx */
1826 8, /* "large" insn */
1827 17, /* MOVE_RATIO */
1828 4, /* cost for loading QImode using movzbl */
1829 {4, 4, 4}, /* cost of loading integer registers
1830 in QImode, HImode and SImode.
1831 Relative to reg-reg move (2). */
1832 {4, 4, 4}, /* cost of storing integer registers */
1833 4, /* cost of reg,reg fld/fst */
1834 {12, 12, 12}, /* cost of loading fp registers
1835 in SFmode, DFmode and XFmode */
1836 {6, 6, 8}, /* cost of storing fp registers
1837 in SFmode, DFmode and XFmode */
1838 2, /* cost of moving MMX register */
1839 {8, 8}, /* cost of loading MMX registers
1840 in SImode and DImode */
1841 {8, 8}, /* cost of storing MMX registers
1842 in SImode and DImode */
1843 2, /* cost of moving SSE register */
1844 {8, 8, 8}, /* cost of loading SSE registers
1845 in SImode, DImode and TImode */
1846 {8, 8, 8}, /* cost of storing SSE registers
1847 in SImode, DImode and TImode */
1848 5, /* MMX or SSE register to integer */
1849 32, /* size of l1 cache. */
1850 256, /* size of l2 cache. */
1851 64, /* size of prefetch block */
1852 6, /* number of parallel prefetches */
1853 3, /* Branch cost */
1854 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1855 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1856 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1857 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1858 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1859 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1860 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1861 DUMMY_STRINGOP_ALGS
},
1862 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1863 DUMMY_STRINGOP_ALGS
},
1864 1, /* scalar_stmt_cost. */
1865 1, /* scalar load_cost. */
1866 1, /* scalar_store_cost. */
1867 1, /* vec_stmt_cost. */
1868 1, /* vec_to_scalar_cost. */
1869 1, /* scalar_to_vec_cost. */
1870 1, /* vec_align_load_cost. */
1871 2, /* vec_unalign_load_cost. */
1872 1, /* vec_store_cost. */
1873 3, /* cond_taken_branch_cost. */
1874 1, /* cond_not_taken_branch_cost. */
1877 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1879 /* Processor feature/optimization bitmasks. */
1880 #define m_386 (1<<PROCESSOR_I386)
1881 #define m_486 (1<<PROCESSOR_I486)
1882 #define m_PENT (1<<PROCESSOR_PENTIUM)
1883 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1884 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1885 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1886 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1887 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1888 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1889 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1890 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1891 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1892 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1893 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1894 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1895 #define m_ATOM (1<<PROCESSOR_ATOM)
1897 #define m_GEODE (1<<PROCESSOR_GEODE)
1898 #define m_K6 (1<<PROCESSOR_K6)
1899 #define m_K6_GEODE (m_K6 | m_GEODE)
1900 #define m_K8 (1<<PROCESSOR_K8)
1901 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1902 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1903 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1904 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1905 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1906 #define m_BDVER (m_BDVER1 | m_BDVER2)
1907 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1908 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1910 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1911 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1913 /* Generic instruction choice should be common subset of supported CPUs
1914 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1915 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1917 /* Feature tests against the various tunings. */
1918 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1920 /* Feature tests against the various tunings used to create ix86_tune_features
1921 based on the processor mask. */
1922 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1923 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1924 negatively, so enabling for Generic64 seems like good code size
1925 tradeoff. We can't enable it for 32bit generic because it does not
1926 work well with PPro base chips. */
1927 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1929 /* X86_TUNE_PUSH_MEMORY */
1930 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1932 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1935 /* X86_TUNE_UNROLL_STRLEN */
1936 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1938 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1939 on simulation result. But after P4 was made, no performance benefit
1940 was observed with branch hints. It also increases the code size.
1941 As a result, icc never generates branch hints. */
1944 /* X86_TUNE_DOUBLE_WITH_ADD */
1947 /* X86_TUNE_USE_SAHF */
1948 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
1950 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1951 partial dependencies. */
1952 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1954 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1955 register stalls on Generic32 compilation setting as well. However
1956 in current implementation the partial register stalls are not eliminated
1957 very well - they can be introduced via subregs synthesized by combine
1958 and can happen in caller/callee saving sequences. Because this option
1959 pays back little on PPro based chips and is in conflict with partial reg
1960 dependencies used by Athlon/P4 based chips, it is better to leave it off
1961 for generic32 for now. */
1964 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1965 m_CORE2I7
| m_GENERIC
,
1967 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1968 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1969 m_CORE2I7
| m_GENERIC
,
1971 /* X86_TUNE_USE_HIMODE_FIOP */
1972 m_386
| m_486
| m_K6_GEODE
,
1974 /* X86_TUNE_USE_SIMODE_FIOP */
1975 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1977 /* X86_TUNE_USE_MOV0 */
1980 /* X86_TUNE_USE_CLTD */
1981 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1983 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1986 /* X86_TUNE_SPLIT_LONG_MOVES */
1989 /* X86_TUNE_READ_MODIFY_WRITE */
1992 /* X86_TUNE_READ_MODIFY */
1995 /* X86_TUNE_PROMOTE_QIMODE */
1996 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1998 /* X86_TUNE_FAST_PREFIX */
1999 ~(m_386
| m_486
| m_PENT
),
2001 /* X86_TUNE_SINGLE_STRINGOP */
2002 m_386
| m_P4_NOCONA
,
2004 /* X86_TUNE_QIMODE_MATH */
2007 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2008 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2009 might be considered for Generic32 if our scheme for avoiding partial
2010 stalls was more effective. */
2013 /* X86_TUNE_PROMOTE_QI_REGS */
2016 /* X86_TUNE_PROMOTE_HI_REGS */
2019 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2020 over esp addition. */
2021 m_386
| m_486
| m_PENT
| m_PPRO
,
2023 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2024 over esp addition. */
2027 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2028 over esp subtraction. */
2029 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2031 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2032 over esp subtraction. */
2033 m_PENT
| m_K6_GEODE
,
2035 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2036 for DFmode copies */
2037 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2039 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2040 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2042 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2043 conflict here in between PPro/Pentium4 based chips that thread 128bit
2044 SSE registers as single units versus K8 based chips that divide SSE
2045 registers to two 64bit halves. This knob promotes all store destinations
2046 to be 128bit to allow register renaming on 128bit SSE units, but usually
2047 results in one extra microop on 64bit SSE units. Experimental results
2048 shows that disabling this option on P4 brings over 20% SPECfp regression,
2049 while enabling it on K8 brings roughly 2.4% regression that can be partly
2050 masked by careful scheduling of moves. */
2051 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2053 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2054 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2056 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2059 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2062 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2063 are resolved on SSE register parts instead of whole registers, so we may
2064 maintain just lower part of scalar values in proper format leaving the
2065 upper part undefined. */
2068 /* X86_TUNE_SSE_TYPELESS_STORES */
2071 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2072 m_PPRO
| m_P4_NOCONA
,
2074 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2075 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2077 /* X86_TUNE_PROLOGUE_USING_MOVE */
2078 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2080 /* X86_TUNE_EPILOGUE_USING_MOVE */
2081 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2083 /* X86_TUNE_SHIFT1 */
2086 /* X86_TUNE_USE_FFREEP */
2089 /* X86_TUNE_INTER_UNIT_MOVES */
2090 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2092 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2093 ~(m_AMDFAM10
| m_BDVER
),
2095 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2096 than 4 branch instructions in the 16 byte window. */
2097 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2099 /* X86_TUNE_SCHEDULE */
2100 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2102 /* X86_TUNE_USE_BT */
2103 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2105 /* X86_TUNE_USE_INCDEC */
2106 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2108 /* X86_TUNE_PAD_RETURNS */
2109 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2111 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2114 /* X86_TUNE_EXT_80387_CONSTANTS */
2115 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2117 /* X86_TUNE_SHORTEN_X87_SSE */
2120 /* X86_TUNE_AVOID_VECTOR_DECODE */
2121 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2123 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2124 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2127 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2128 vector path on AMD machines. */
2129 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2131 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2133 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2135 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2139 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2140 but one byte longer. */
2143 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2144 operand that cannot be represented using a modRM byte. The XOR
2145 replacement is long decoded, so this split helps here as well. */
2148 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2150 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2152 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2153 from integer to FP. */
2156 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2157 with a subsequent conditional jump instruction into a single
2158 compare-and-branch uop. */
2161 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2162 will impact LEA instruction selection. */
2165 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2169 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2170 at -O3. For the moment, the prefetching seems badly tuned for Intel
2172 m_K6_GEODE
| m_AMD_MULTIPLE
,
2174 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2175 the auto-vectorizer. */
2178 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2179 during reassociation of integer computation. */
2182 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2183 during reassociation of fp computation. */
2187 /* Feature tests against the various architecture variations. */
2188 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2190 /* Feature tests against the various architecture variations, used to create
2191 ix86_arch_features based on the processor mask. */
2192 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2193 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2194 ~(m_386
| m_486
| m_PENT
| m_K6
),
2196 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2199 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2202 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2205 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2209 static const unsigned int x86_accumulate_outgoing_args
2210 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2212 static const unsigned int x86_arch_always_fancy_math_387
2213 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2215 static const unsigned int x86_avx256_split_unaligned_load
2216 = m_COREI7
| m_GENERIC
;
2218 static const unsigned int x86_avx256_split_unaligned_store
2219 = m_COREI7
| m_BDVER
| m_GENERIC
;
2221 /* In case the average insn count for single function invocation is
2222 lower than this constant, emit fast (but longer) prologue and
2224 #define FAST_PROLOGUE_INSN_COUNT 20
2226 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2227 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2228 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2229 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2231 /* Array of the smallest class containing reg number REGNO, indexed by
2232 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2234 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2236 /* ax, dx, cx, bx */
2237 AREG
, DREG
, CREG
, BREG
,
2238 /* si, di, bp, sp */
2239 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2241 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2242 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2245 /* flags, fpsr, fpcr, frame */
2246 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2248 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2251 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2254 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2255 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2256 /* SSE REX registers */
2257 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2261 /* The "default" register map used in 32bit mode. */
2263 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2265 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2266 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2268 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2269 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2270 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2274 /* The "default" register map used in 64bit mode. */
2276 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2278 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2279 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2280 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2281 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2282 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2283 8,9,10,11,12,13,14,15, /* extended integer registers */
2284 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2287 /* Define the register numbers to be used in Dwarf debugging information.
2288 The SVR4 reference port C compiler uses the following register numbers
2289 in its Dwarf output code:
2290 0 for %eax (gcc regno = 0)
2291 1 for %ecx (gcc regno = 2)
2292 2 for %edx (gcc regno = 1)
2293 3 for %ebx (gcc regno = 3)
2294 4 for %esp (gcc regno = 7)
2295 5 for %ebp (gcc regno = 6)
2296 6 for %esi (gcc regno = 4)
2297 7 for %edi (gcc regno = 5)
2298 The following three DWARF register numbers are never generated by
2299 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2300 believes these numbers have these meanings.
2301 8 for %eip (no gcc equivalent)
2302 9 for %eflags (gcc regno = 17)
2303 10 for %trapno (no gcc equivalent)
2304 It is not at all clear how we should number the FP stack registers
2305 for the x86 architecture. If the version of SDB on x86/svr4 were
2306 a bit less brain dead with respect to floating-point then we would
2307 have a precedent to follow with respect to DWARF register numbers
2308 for x86 FP registers, but the SDB on x86/svr4 is so completely
2309 broken with respect to FP registers that it is hardly worth thinking
2310 of it as something to strive for compatibility with.
2311 The version of x86/svr4 SDB I have at the moment does (partially)
2312 seem to believe that DWARF register number 11 is associated with
2313 the x86 register %st(0), but that's about all. Higher DWARF
2314 register numbers don't seem to be associated with anything in
2315 particular, and even for DWARF regno 11, SDB only seems to under-
2316 stand that it should say that a variable lives in %st(0) (when
2317 asked via an `=' command) if we said it was in DWARF regno 11,
2318 but SDB still prints garbage when asked for the value of the
2319 variable in question (via a `/' command).
2320 (Also note that the labels SDB prints for various FP stack regs
2321 when doing an `x' command are all wrong.)
2322 Note that these problems generally don't affect the native SVR4
2323 C compiler because it doesn't allow the use of -O with -g and
2324 because when it is *not* optimizing, it allocates a memory
2325 location for each floating-point variable, and the memory
2326 location is what gets described in the DWARF AT_location
2327 attribute for the variable in question.
2328 Regardless of the severe mental illness of the x86/svr4 SDB, we
2329 do something sensible here and we use the following DWARF
2330 register numbers. Note that these are all stack-top-relative
2332 11 for %st(0) (gcc regno = 8)
2333 12 for %st(1) (gcc regno = 9)
2334 13 for %st(2) (gcc regno = 10)
2335 14 for %st(3) (gcc regno = 11)
2336 15 for %st(4) (gcc regno = 12)
2337 16 for %st(5) (gcc regno = 13)
2338 17 for %st(6) (gcc regno = 14)
2339 18 for %st(7) (gcc regno = 15)
2341 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2343 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2344 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2345 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2346 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2347 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2348 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2349 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2352 /* Define parameter passing and return registers. */
2354 static int const x86_64_int_parameter_registers
[6] =
2356 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2359 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2361 CX_REG
, DX_REG
, R8_REG
, R9_REG
2364 static int const x86_64_int_return_registers
[4] =
2366 AX_REG
, DX_REG
, DI_REG
, SI_REG
2369 /* Define the structure for the machine field in struct function. */
2371 struct GTY(()) stack_local_entry
{
2372 unsigned short mode
;
2375 struct stack_local_entry
*next
;
2378 /* Structure describing stack frame layout.
2379 Stack grows downward:
2385 saved static chain if ix86_static_chain_on_stack
2387 saved frame pointer if frame_pointer_needed
2388 <- HARD_FRAME_POINTER
2394 <- sse_regs_save_offset
2397 [va_arg registers] |
2401 [padding2] | = to_allocate
2410 int outgoing_arguments_size
;
2411 HOST_WIDE_INT frame
;
2413 /* The offsets relative to ARG_POINTER. */
2414 HOST_WIDE_INT frame_pointer_offset
;
2415 HOST_WIDE_INT hard_frame_pointer_offset
;
2416 HOST_WIDE_INT stack_pointer_offset
;
2417 HOST_WIDE_INT hfp_save_offset
;
2418 HOST_WIDE_INT reg_save_offset
;
2419 HOST_WIDE_INT sse_reg_save_offset
;
2421 /* When save_regs_using_mov is set, emit prologue using
2422 move instead of push instructions. */
2423 bool save_regs_using_mov
;
2426 /* Which cpu are we scheduling for. */
2427 enum attr_cpu ix86_schedule
;
2429 /* Which cpu are we optimizing for. */
2430 enum processor_type ix86_tune
;
2432 /* Which instruction set architecture to use. */
2433 enum processor_type ix86_arch
;
2435 /* true if sse prefetch instruction is not NOOP. */
2436 int x86_prefetch_sse
;
2438 /* -mstackrealign option */
2439 static const char ix86_force_align_arg_pointer_string
[]
2440 = "force_align_arg_pointer";
2442 static rtx (*ix86_gen_leave
) (void);
2443 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2444 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2445 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2446 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2447 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2448 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2449 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2450 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2451 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2452 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2453 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2455 /* Preferred alignment for stack boundary in bits. */
2456 unsigned int ix86_preferred_stack_boundary
;
2458 /* Alignment for incoming stack boundary in bits specified at
2460 static unsigned int ix86_user_incoming_stack_boundary
;
2462 /* Default alignment for incoming stack boundary in bits. */
2463 static unsigned int ix86_default_incoming_stack_boundary
;
2465 /* Alignment for incoming stack boundary in bits. */
2466 unsigned int ix86_incoming_stack_boundary
;
2468 /* Calling abi specific va_list type nodes. */
2469 static GTY(()) tree sysv_va_list_type_node
;
2470 static GTY(()) tree ms_va_list_type_node
;
2472 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2473 char internal_label_prefix
[16];
2474 int internal_label_prefix_len
;
2476 /* Fence to use after loop using movnt. */
2479 /* Register class used for passing given 64bit part of the argument.
2480 These represent classes as documented by the PS ABI, with the exception
2481 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2482 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2484 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2485 whenever possible (upper half does contain padding). */
2486 enum x86_64_reg_class
2489 X86_64_INTEGER_CLASS
,
2490 X86_64_INTEGERSI_CLASS
,
2497 X86_64_COMPLEX_X87_CLASS
,
2501 #define MAX_CLASSES 4
2503 /* Table of constants used by fldpi, fldln2, etc.... */
2504 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2505 static bool ext_80387_constants_init
= 0;
2508 static struct machine_function
* ix86_init_machine_status (void);
2509 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2510 static bool ix86_function_value_regno_p (const unsigned int);
2511 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2513 static rtx
ix86_static_chain (const_tree
, bool);
2514 static int ix86_function_regparm (const_tree
, const_tree
);
2515 static void ix86_compute_frame_layout (struct ix86_frame
*);
2516 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2518 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2519 static tree
ix86_canonical_va_list_type (tree
);
2520 static void predict_jump (int);
2521 static unsigned int split_stack_prologue_scratch_regno (void);
2522 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2524 enum ix86_function_specific_strings
2526 IX86_FUNCTION_SPECIFIC_ARCH
,
2527 IX86_FUNCTION_SPECIFIC_TUNE
,
2528 IX86_FUNCTION_SPECIFIC_MAX
2531 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2532 const char *, enum fpmath_unit
, bool);
2533 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2534 static void ix86_function_specific_save (struct cl_target_option
*);
2535 static void ix86_function_specific_restore (struct cl_target_option
*);
2536 static void ix86_function_specific_print (FILE *, int,
2537 struct cl_target_option
*);
2538 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2539 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2540 struct gcc_options
*);
2541 static bool ix86_can_inline_p (tree
, tree
);
2542 static void ix86_set_current_function (tree
);
2543 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2545 static enum calling_abi
ix86_function_abi (const_tree
);
2548 #ifndef SUBTARGET32_DEFAULT_CPU
2549 #define SUBTARGET32_DEFAULT_CPU "i386"
2552 /* The svr4 ABI for the i386 says that records and unions are returned
2554 #ifndef DEFAULT_PCC_STRUCT_RETURN
2555 #define DEFAULT_PCC_STRUCT_RETURN 1
2558 /* Whether -mtune= or -march= were specified */
2559 static int ix86_tune_defaulted
;
2560 static int ix86_arch_specified
;
2562 /* Vectorization library interface and handlers. */
2563 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2565 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2566 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2568 /* Processor target table, indexed by processor number */
2571 const struct processor_costs
*cost
; /* Processor costs */
2572 const int align_loop
; /* Default alignments. */
2573 const int align_loop_max_skip
;
2574 const int align_jump
;
2575 const int align_jump_max_skip
;
2576 const int align_func
;
2579 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2581 {&i386_cost
, 4, 3, 4, 3, 4},
2582 {&i486_cost
, 16, 15, 16, 15, 16},
2583 {&pentium_cost
, 16, 7, 16, 7, 16},
2584 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2585 {&geode_cost
, 0, 0, 0, 0, 0},
2586 {&k6_cost
, 32, 7, 32, 7, 32},
2587 {&athlon_cost
, 16, 7, 16, 7, 16},
2588 {&pentium4_cost
, 0, 0, 0, 0, 0},
2589 {&k8_cost
, 16, 7, 16, 7, 16},
2590 {&nocona_cost
, 0, 0, 0, 0, 0},
2591 /* Core 2 32-bit. */
2592 {&generic32_cost
, 16, 10, 16, 10, 16},
2593 /* Core 2 64-bit. */
2594 {&generic64_cost
, 16, 10, 16, 10, 16},
2595 /* Core i7 32-bit. */
2596 {&generic32_cost
, 16, 10, 16, 10, 16},
2597 /* Core i7 64-bit. */
2598 {&generic64_cost
, 16, 10, 16, 10, 16},
2599 {&generic32_cost
, 16, 7, 16, 7, 16},
2600 {&generic64_cost
, 16, 10, 16, 10, 16},
2601 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2602 {&bdver1_cost
, 32, 24, 32, 7, 32},
2603 {&bdver2_cost
, 32, 24, 32, 7, 32},
2604 {&btver1_cost
, 32, 24, 32, 7, 32},
2605 {&atom_cost
, 16, 15, 16, 7, 16}
2608 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2638 /* Return true if a red-zone is in use. */
2641 ix86_using_red_zone (void)
2643 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2646 /* Return a string that documents the current -m options. The caller is
2647 responsible for freeing the string. */
2650 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2651 const char *tune
, enum fpmath_unit fpmath
,
2654 struct ix86_target_opts
2656 const char *option
; /* option string */
2657 HOST_WIDE_INT mask
; /* isa mask options */
2660 /* This table is ordered so that options like -msse4.2 that imply
2661 preceding options while match those first. */
2662 static struct ix86_target_opts isa_opts
[] =
2664 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2665 { "-mfma", OPTION_MASK_ISA_FMA
},
2666 { "-mxop", OPTION_MASK_ISA_XOP
},
2667 { "-mlwp", OPTION_MASK_ISA_LWP
},
2668 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2669 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2670 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2671 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2672 { "-msse3", OPTION_MASK_ISA_SSE3
},
2673 { "-msse2", OPTION_MASK_ISA_SSE2
},
2674 { "-msse", OPTION_MASK_ISA_SSE
},
2675 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2676 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2677 { "-mmmx", OPTION_MASK_ISA_MMX
},
2678 { "-mabm", OPTION_MASK_ISA_ABM
},
2679 { "-mbmi", OPTION_MASK_ISA_BMI
},
2680 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2681 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2682 { "-mhle", OPTION_MASK_ISA_HLE
},
2683 { "-mtbm", OPTION_MASK_ISA_TBM
},
2684 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2685 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2686 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2687 { "-maes", OPTION_MASK_ISA_AES
},
2688 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2689 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2690 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2691 { "-mf16c", OPTION_MASK_ISA_F16C
},
2692 { "-mrtm", OPTION_MASK_ISA_RTM
},
2696 static struct ix86_target_opts flag_opts
[] =
2698 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2699 { "-m80387", MASK_80387
},
2700 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2701 { "-malign-double", MASK_ALIGN_DOUBLE
},
2702 { "-mcld", MASK_CLD
},
2703 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2704 { "-mieee-fp", MASK_IEEE_FP
},
2705 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2706 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2707 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2708 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2709 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2710 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2711 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2712 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2713 { "-mrecip", MASK_RECIP
},
2714 { "-mrtd", MASK_RTD
},
2715 { "-msseregparm", MASK_SSEREGPARM
},
2716 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2717 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2718 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2719 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2720 { "-mvzeroupper", MASK_VZEROUPPER
},
2721 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2722 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2723 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2726 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2729 char target_other
[40];
2739 memset (opts
, '\0', sizeof (opts
));
2741 /* Add -march= option. */
2744 opts
[num
][0] = "-march=";
2745 opts
[num
++][1] = arch
;
2748 /* Add -mtune= option. */
2751 opts
[num
][0] = "-mtune=";
2752 opts
[num
++][1] = tune
;
2755 /* Add -m32/-m64/-mx32. */
2756 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2758 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2762 isa
&= ~ (OPTION_MASK_ISA_64BIT
2763 | OPTION_MASK_ABI_64
2764 | OPTION_MASK_ABI_X32
);
2768 opts
[num
++][0] = abi
;
2770 /* Pick out the options in isa options. */
2771 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2773 if ((isa
& isa_opts
[i
].mask
) != 0)
2775 opts
[num
++][0] = isa_opts
[i
].option
;
2776 isa
&= ~ isa_opts
[i
].mask
;
2780 if (isa
&& add_nl_p
)
2782 opts
[num
++][0] = isa_other
;
2783 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2787 /* Add flag options. */
2788 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2790 if ((flags
& flag_opts
[i
].mask
) != 0)
2792 opts
[num
++][0] = flag_opts
[i
].option
;
2793 flags
&= ~ flag_opts
[i
].mask
;
2797 if (flags
&& add_nl_p
)
2799 opts
[num
++][0] = target_other
;
2800 sprintf (target_other
, "(other flags: %#x)", flags
);
2803 /* Add -fpmath= option. */
2806 opts
[num
][0] = "-mfpmath=";
2807 switch ((int) fpmath
)
2810 opts
[num
++][1] = "387";
2814 opts
[num
++][1] = "sse";
2817 case FPMATH_387
| FPMATH_SSE
:
2818 opts
[num
++][1] = "sse+387";
2830 gcc_assert (num
< ARRAY_SIZE (opts
));
2832 /* Size the string. */
2834 sep_len
= (add_nl_p
) ? 3 : 1;
2835 for (i
= 0; i
< num
; i
++)
2838 for (j
= 0; j
< 2; j
++)
2840 len
+= strlen (opts
[i
][j
]);
2843 /* Build the string. */
2844 ret
= ptr
= (char *) xmalloc (len
);
2847 for (i
= 0; i
< num
; i
++)
2851 for (j
= 0; j
< 2; j
++)
2852 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2859 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2867 for (j
= 0; j
< 2; j
++)
2870 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2872 line_len
+= len2
[j
];
2877 gcc_assert (ret
+ len
>= ptr
);
2882 /* Return true, if profiling code should be emitted before
2883 prologue. Otherwise it returns false.
2884 Note: For x86 with "hotfix" it is sorried. */
2886 ix86_profile_before_prologue (void)
2888 return flag_fentry
!= 0;
2891 /* Function that is callable from the debugger to print the current
2894 ix86_debug_options (void)
2896 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2897 ix86_arch_string
, ix86_tune_string
,
2902 fprintf (stderr
, "%s\n\n", opts
);
2906 fputs ("<no options>\n\n", stderr
);
2911 /* Override various settings based on options. If MAIN_ARGS_P, the
2912 options are from the command line, otherwise they are from
2916 ix86_option_override_internal (bool main_args_p
)
2919 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2920 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2925 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2926 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2927 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2928 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2929 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2930 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2931 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2932 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2933 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2934 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2935 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2936 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2937 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2938 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2939 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2940 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2941 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2942 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2943 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2944 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2945 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2946 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2947 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2948 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2949 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2950 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2951 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2952 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2953 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2954 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2955 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2956 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2957 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2958 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2959 /* if this reaches 64, need to widen struct pta flags below */
2963 const char *const name
; /* processor name or nickname. */
2964 const enum processor_type processor
;
2965 const enum attr_cpu schedule
;
2966 const unsigned HOST_WIDE_INT flags
;
2968 const processor_alias_table
[] =
2970 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2971 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2972 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2973 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2974 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2975 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2976 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2977 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2978 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2979 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2980 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2981 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2982 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2984 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2986 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2987 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2988 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2989 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2990 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2991 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2992 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2993 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2994 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2995 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2996 | PTA_CX16
| PTA_NO_SAHF
},
2997 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
2998 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2999 | PTA_SSSE3
| PTA_CX16
},
3000 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
3001 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3002 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
3003 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
3004 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3005 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3006 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
3007 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
3008 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3009 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3010 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3011 | PTA_RDRND
| PTA_F16C
},
3012 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
3013 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3014 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3015 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3016 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3017 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
},
3018 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3019 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3020 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
3021 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3022 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3023 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3024 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3025 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3026 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3027 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3028 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3029 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3030 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3031 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3032 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3033 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3034 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3035 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3036 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3037 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3038 {"k8", PROCESSOR_K8
, CPU_K8
,
3039 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3040 | PTA_SSE2
| PTA_NO_SAHF
},
3041 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3042 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3043 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3044 {"opteron", PROCESSOR_K8
, CPU_K8
,
3045 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3046 | PTA_SSE2
| PTA_NO_SAHF
},
3047 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3048 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3049 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3050 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3051 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3052 | PTA_SSE2
| PTA_NO_SAHF
},
3053 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3054 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3055 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3056 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3057 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3058 | PTA_SSE2
| PTA_NO_SAHF
},
3059 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3060 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3061 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3062 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3063 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3064 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3065 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3066 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3067 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3068 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3069 | PTA_XOP
| PTA_LWP
},
3070 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3071 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3072 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3073 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3074 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3076 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3077 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3078 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3079 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3080 PTA_HLE
/* flags are only used for -march switch. */ },
3081 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3083 | PTA_HLE
/* flags are only used for -march switch. */ },
3086 /* -mrecip options. */
3089 const char *string
; /* option name */
3090 unsigned int mask
; /* mask bits to set */
3092 const recip_options
[] =
3094 { "all", RECIP_MASK_ALL
},
3095 { "none", RECIP_MASK_NONE
},
3096 { "div", RECIP_MASK_DIV
},
3097 { "sqrt", RECIP_MASK_SQRT
},
3098 { "vec-div", RECIP_MASK_VEC_DIV
},
3099 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3102 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3104 /* Set up prefix/suffix so the error messages refer to either the command
3105 line argument, or the attribute(target). */
3114 prefix
= "option(\"";
3119 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3120 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3121 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3122 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3123 #ifdef TARGET_BI_ARCH
3126 #if TARGET_BI_ARCH == 1
3127 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3128 is on and OPTION_MASK_ABI_X32 is off. We turn off
3129 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3132 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3134 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3135 on and OPTION_MASK_ABI_64 is off. We turn off
3136 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3139 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3146 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3147 OPTION_MASK_ABI_64 for TARGET_X32. */
3148 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3149 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3151 else if (TARGET_LP64
)
3153 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3154 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3155 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3156 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3159 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3160 SUBTARGET_OVERRIDE_OPTIONS
;
3163 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3164 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3167 /* -fPIC is the default for x86_64. */
3168 if (TARGET_MACHO
&& TARGET_64BIT
)
3171 /* Need to check -mtune=generic first. */
3172 if (ix86_tune_string
)
3174 if (!strcmp (ix86_tune_string
, "generic")
3175 || !strcmp (ix86_tune_string
, "i686")
3176 /* As special support for cross compilers we read -mtune=native
3177 as -mtune=generic. With native compilers we won't see the
3178 -mtune=native, as it was changed by the driver. */
3179 || !strcmp (ix86_tune_string
, "native"))
3182 ix86_tune_string
= "generic64";
3184 ix86_tune_string
= "generic32";
3186 /* If this call is for setting the option attribute, allow the
3187 generic32/generic64 that was previously set. */
3188 else if (!main_args_p
3189 && (!strcmp (ix86_tune_string
, "generic32")
3190 || !strcmp (ix86_tune_string
, "generic64")))
3192 else if (!strncmp (ix86_tune_string
, "generic", 7))
3193 error ("bad value (%s) for %stune=%s %s",
3194 ix86_tune_string
, prefix
, suffix
, sw
);
3195 else if (!strcmp (ix86_tune_string
, "x86-64"))
3196 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3197 "%stune=k8%s or %stune=generic%s instead as appropriate",
3198 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3202 if (ix86_arch_string
)
3203 ix86_tune_string
= ix86_arch_string
;
3204 if (!ix86_tune_string
)
3206 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3207 ix86_tune_defaulted
= 1;
3210 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3211 need to use a sensible tune option. */
3212 if (!strcmp (ix86_tune_string
, "generic")
3213 || !strcmp (ix86_tune_string
, "x86-64")
3214 || !strcmp (ix86_tune_string
, "i686"))
3217 ix86_tune_string
= "generic64";
3219 ix86_tune_string
= "generic32";
3223 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3225 /* rep; movq isn't available in 32-bit code. */
3226 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3227 ix86_stringop_alg
= no_stringop
;
3230 if (!ix86_arch_string
)
3231 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3233 ix86_arch_specified
= 1;
3235 if (global_options_set
.x_ix86_pmode
)
3237 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3238 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3239 error ("address mode %qs not supported in the %s bit mode",
3240 TARGET_64BIT
? "short" : "long",
3241 TARGET_64BIT
? "64" : "32");
3244 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3246 if (!global_options_set
.x_ix86_abi
)
3247 ix86_abi
= DEFAULT_ABI
;
3249 if (global_options_set
.x_ix86_cmodel
)
3251 switch (ix86_cmodel
)
3256 ix86_cmodel
= CM_SMALL_PIC
;
3258 error ("code model %qs not supported in the %s bit mode",
3265 ix86_cmodel
= CM_MEDIUM_PIC
;
3267 error ("code model %qs not supported in the %s bit mode",
3269 else if (TARGET_X32
)
3270 error ("code model %qs not supported in x32 mode",
3277 ix86_cmodel
= CM_LARGE_PIC
;
3279 error ("code model %qs not supported in the %s bit mode",
3281 else if (TARGET_X32
)
3282 error ("code model %qs not supported in x32 mode",
3288 error ("code model %s does not support PIC mode", "32");
3290 error ("code model %qs not supported in the %s bit mode",
3297 error ("code model %s does not support PIC mode", "kernel");
3298 ix86_cmodel
= CM_32
;
3301 error ("code model %qs not supported in the %s bit mode",
3311 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3312 use of rip-relative addressing. This eliminates fixups that
3313 would otherwise be needed if this object is to be placed in a
3314 DLL, and is essentially just as efficient as direct addressing. */
3315 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3316 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3317 else if (TARGET_64BIT
)
3318 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3320 ix86_cmodel
= CM_32
;
3322 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3324 error ("-masm=intel not supported in this configuration");
3325 ix86_asm_dialect
= ASM_ATT
;
3327 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3328 sorry ("%i-bit mode not compiled in",
3329 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3331 for (i
= 0; i
< pta_size
; i
++)
3332 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3334 ix86_schedule
= processor_alias_table
[i
].schedule
;
3335 ix86_arch
= processor_alias_table
[i
].processor
;
3336 /* Default cpu tuning to the architecture. */
3337 ix86_tune
= ix86_arch
;
3339 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3340 error ("CPU you selected does not support x86-64 "
3343 if (processor_alias_table
[i
].flags
& PTA_MMX
3344 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3345 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3346 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3347 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3348 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3349 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3350 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3351 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3352 if (processor_alias_table
[i
].flags
& PTA_SSE
3353 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3354 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3355 if (processor_alias_table
[i
].flags
& PTA_SSE2
3356 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3357 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3358 if (processor_alias_table
[i
].flags
& PTA_SSE3
3359 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3360 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3361 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3362 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3363 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3364 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3365 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3366 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3367 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3368 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3369 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3370 if (processor_alias_table
[i
].flags
& PTA_AVX
3371 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3372 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3373 if (processor_alias_table
[i
].flags
& PTA_AVX2
3374 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3375 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3376 if (processor_alias_table
[i
].flags
& PTA_FMA
3377 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3378 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3379 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3380 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3381 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3382 if (processor_alias_table
[i
].flags
& PTA_FMA4
3383 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3384 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3385 if (processor_alias_table
[i
].flags
& PTA_XOP
3386 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3387 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3388 if (processor_alias_table
[i
].flags
& PTA_LWP
3389 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3390 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3391 if (processor_alias_table
[i
].flags
& PTA_ABM
3392 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3393 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3394 if (processor_alias_table
[i
].flags
& PTA_BMI
3395 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3396 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3397 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3398 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3399 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3400 if (processor_alias_table
[i
].flags
& PTA_TBM
3401 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3402 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3403 if (processor_alias_table
[i
].flags
& PTA_BMI2
3404 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3405 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3406 if (processor_alias_table
[i
].flags
& PTA_CX16
3407 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3408 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3409 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3410 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3411 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3412 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3413 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3414 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3415 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3416 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3417 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3418 if (processor_alias_table
[i
].flags
& PTA_AES
3419 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3420 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3421 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3422 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3423 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3424 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3425 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3426 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3427 if (processor_alias_table
[i
].flags
& PTA_RDRND
3428 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3429 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3430 if (processor_alias_table
[i
].flags
& PTA_F16C
3431 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3432 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3433 if (processor_alias_table
[i
].flags
& PTA_RTM
3434 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3435 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3436 if (processor_alias_table
[i
].flags
& PTA_HLE
3437 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3438 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3439 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3440 x86_prefetch_sse
= true;
3445 if (!strcmp (ix86_arch_string
, "generic"))
3446 error ("generic CPU can be used only for %stune=%s %s",
3447 prefix
, suffix
, sw
);
3448 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3449 error ("bad value (%s) for %sarch=%s %s",
3450 ix86_arch_string
, prefix
, suffix
, sw
);
3452 ix86_arch_mask
= 1u << ix86_arch
;
3453 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3454 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3456 for (i
= 0; i
< pta_size
; i
++)
3457 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3459 ix86_schedule
= processor_alias_table
[i
].schedule
;
3460 ix86_tune
= processor_alias_table
[i
].processor
;
3463 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3465 if (ix86_tune_defaulted
)
3467 ix86_tune_string
= "x86-64";
3468 for (i
= 0; i
< pta_size
; i
++)
3469 if (! strcmp (ix86_tune_string
,
3470 processor_alias_table
[i
].name
))
3472 ix86_schedule
= processor_alias_table
[i
].schedule
;
3473 ix86_tune
= processor_alias_table
[i
].processor
;
3476 error ("CPU you selected does not support x86-64 "
3482 /* Adjust tuning when compiling for 32-bit ABI. */
3485 case PROCESSOR_GENERIC64
:
3486 ix86_tune
= PROCESSOR_GENERIC32
;
3487 ix86_schedule
= CPU_PENTIUMPRO
;
3490 case PROCESSOR_CORE2_64
:
3491 ix86_tune
= PROCESSOR_CORE2_32
;
3494 case PROCESSOR_COREI7_64
:
3495 ix86_tune
= PROCESSOR_COREI7_32
;
3502 /* Intel CPUs have always interpreted SSE prefetch instructions as
3503 NOPs; so, we can enable SSE prefetch instructions even when
3504 -mtune (rather than -march) points us to a processor that has them.
3505 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3506 higher processors. */
3508 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3509 x86_prefetch_sse
= true;
3513 if (ix86_tune_specified
&& i
== pta_size
)
3514 error ("bad value (%s) for %stune=%s %s",
3515 ix86_tune_string
, prefix
, suffix
, sw
);
3517 ix86_tune_mask
= 1u << ix86_tune
;
3518 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3519 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3521 #ifndef USE_IX86_FRAME_POINTER
3522 #define USE_IX86_FRAME_POINTER 0
3525 #ifndef USE_X86_64_FRAME_POINTER
3526 #define USE_X86_64_FRAME_POINTER 0
3529 /* Set the default values for switches whose default depends on TARGET_64BIT
3530 in case they weren't overwritten by command line options. */
3533 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3534 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3535 if (flag_asynchronous_unwind_tables
== 2)
3536 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3537 if (flag_pcc_struct_return
== 2)
3538 flag_pcc_struct_return
= 0;
3542 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3543 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3544 if (flag_asynchronous_unwind_tables
== 2)
3545 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3546 if (flag_pcc_struct_return
== 2)
3547 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3551 ix86_cost
= &ix86_size_cost
;
3553 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3555 /* Arrange to set up i386_stack_locals for all functions. */
3556 init_machine_status
= ix86_init_machine_status
;
3558 /* Validate -mregparm= value. */
3559 if (global_options_set
.x_ix86_regparm
)
3562 warning (0, "-mregparm is ignored in 64-bit mode");
3563 if (ix86_regparm
> REGPARM_MAX
)
3565 error ("-mregparm=%d is not between 0 and %d",
3566 ix86_regparm
, REGPARM_MAX
);
3571 ix86_regparm
= REGPARM_MAX
;
3573 /* Default align_* from the processor table. */
3574 if (align_loops
== 0)
3576 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3577 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3579 if (align_jumps
== 0)
3581 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3582 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3584 if (align_functions
== 0)
3586 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3589 /* Provide default for -mbranch-cost= value. */
3590 if (!global_options_set
.x_ix86_branch_cost
)
3591 ix86_branch_cost
= ix86_cost
->branch_cost
;
3595 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3597 /* Enable by default the SSE and MMX builtins. Do allow the user to
3598 explicitly disable any of these. In particular, disabling SSE and
3599 MMX for kernel code is extremely useful. */
3600 if (!ix86_arch_specified
)
3602 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3603 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3606 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3610 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3612 if (!ix86_arch_specified
)
3614 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3616 /* i386 ABI does not specify red zone. It still makes sense to use it
3617 when programmer takes care to stack from being destroyed. */
3618 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3619 target_flags
|= MASK_NO_RED_ZONE
;
3622 /* Keep nonleaf frame pointers. */
3623 if (flag_omit_frame_pointer
)
3624 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3625 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3626 flag_omit_frame_pointer
= 1;
3628 /* If we're doing fast math, we don't care about comparison order
3629 wrt NaNs. This lets us use a shorter comparison sequence. */
3630 if (flag_finite_math_only
)
3631 target_flags
&= ~MASK_IEEE_FP
;
3633 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3634 since the insns won't need emulation. */
3635 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3636 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3638 /* Likewise, if the target doesn't have a 387, or we've specified
3639 software floating point, don't use 387 inline intrinsics. */
3641 target_flags
|= MASK_NO_FANCY_MATH_387
;
3643 /* Turn on MMX builtins for -msse. */
3646 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3647 x86_prefetch_sse
= true;
3650 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3651 if (TARGET_SSE4_2
|| TARGET_ABM
)
3652 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3654 /* Turn on lzcnt instruction for -mabm. */
3656 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3658 /* Validate -mpreferred-stack-boundary= value or default it to
3659 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3660 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3661 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3663 int min
= (TARGET_64BIT
? 4 : 2);
3664 int max
= (TARGET_SEH
? 4 : 12);
3666 if (ix86_preferred_stack_boundary_arg
< min
3667 || ix86_preferred_stack_boundary_arg
> max
)
3670 error ("-mpreferred-stack-boundary is not supported "
3673 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3674 ix86_preferred_stack_boundary_arg
, min
, max
);
3677 ix86_preferred_stack_boundary
3678 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3681 /* Set the default value for -mstackrealign. */
3682 if (ix86_force_align_arg_pointer
== -1)
3683 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3685 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3687 /* Validate -mincoming-stack-boundary= value or default it to
3688 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3689 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3690 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3692 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3693 || ix86_incoming_stack_boundary_arg
> 12)
3694 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3695 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3698 ix86_user_incoming_stack_boundary
3699 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3700 ix86_incoming_stack_boundary
3701 = ix86_user_incoming_stack_boundary
;
3705 /* Accept -msseregparm only if at least SSE support is enabled. */
3706 if (TARGET_SSEREGPARM
3708 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3710 if (global_options_set
.x_ix86_fpmath
)
3712 if (ix86_fpmath
& FPMATH_SSE
)
3716 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3717 ix86_fpmath
= FPMATH_387
;
3719 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3721 warning (0, "387 instruction set disabled, using SSE arithmetics");
3722 ix86_fpmath
= FPMATH_SSE
;
3727 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3729 /* If the i387 is disabled, then do not return values in it. */
3731 target_flags
&= ~MASK_FLOAT_RETURNS
;
3733 /* Use external vectorized library in vectorizing intrinsics. */
3734 if (global_options_set
.x_ix86_veclibabi_type
)
3735 switch (ix86_veclibabi_type
)
3737 case ix86_veclibabi_type_svml
:
3738 ix86_veclib_handler
= ix86_veclibabi_svml
;
3741 case ix86_veclibabi_type_acml
:
3742 ix86_veclib_handler
= ix86_veclibabi_acml
;
3749 if ((!USE_IX86_FRAME_POINTER
3750 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3751 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3753 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3755 /* ??? Unwind info is not correct around the CFG unless either a frame
3756 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3757 unwind info generation to be aware of the CFG and propagating states
3759 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3760 || flag_exceptions
|| flag_non_call_exceptions
)
3761 && flag_omit_frame_pointer
3762 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3764 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3765 warning (0, "unwind tables currently require either a frame pointer "
3766 "or %saccumulate-outgoing-args%s for correctness",
3768 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3771 /* If stack probes are required, the space used for large function
3772 arguments on the stack must also be probed, so enable
3773 -maccumulate-outgoing-args so this happens in the prologue. */
3774 if (TARGET_STACK_PROBE
3775 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3777 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3778 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3779 "for correctness", prefix
, suffix
);
3780 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3783 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3786 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3787 p
= strchr (internal_label_prefix
, 'X');
3788 internal_label_prefix_len
= p
- internal_label_prefix
;
3792 /* When scheduling description is not available, disable scheduler pass
3793 so it won't slow down the compilation and make x87 code slower. */
3794 if (!TARGET_SCHEDULE
)
3795 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3797 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3798 ix86_cost
->simultaneous_prefetches
,
3799 global_options
.x_param_values
,
3800 global_options_set
.x_param_values
);
3801 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
3802 global_options
.x_param_values
,
3803 global_options_set
.x_param_values
);
3804 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
3805 global_options
.x_param_values
,
3806 global_options_set
.x_param_values
);
3807 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
3808 global_options
.x_param_values
,
3809 global_options_set
.x_param_values
);
3811 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3812 if (flag_prefetch_loop_arrays
< 0
3815 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3816 flag_prefetch_loop_arrays
= 1;
3818 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3819 can be optimized to ap = __builtin_next_arg (0). */
3820 if (!TARGET_64BIT
&& !flag_split_stack
)
3821 targetm
.expand_builtin_va_start
= NULL
;
3825 ix86_gen_leave
= gen_leave_rex64
;
3826 if (Pmode
== DImode
)
3828 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3829 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3830 ix86_gen_tls_local_dynamic_base_64
3831 = gen_tls_local_dynamic_base_64_di
;
3835 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3836 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3837 ix86_gen_tls_local_dynamic_base_64
3838 = gen_tls_local_dynamic_base_64_si
;
3843 ix86_gen_leave
= gen_leave
;
3844 ix86_gen_monitor
= gen_sse3_monitor
;
3847 if (Pmode
== DImode
)
3849 ix86_gen_add3
= gen_adddi3
;
3850 ix86_gen_sub3
= gen_subdi3
;
3851 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3852 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3853 ix86_gen_andsp
= gen_anddi3
;
3854 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3855 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3856 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3860 ix86_gen_add3
= gen_addsi3
;
3861 ix86_gen_sub3
= gen_subsi3
;
3862 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3863 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3864 ix86_gen_andsp
= gen_andsi3
;
3865 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3866 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3867 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3871 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3873 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3876 if (!TARGET_64BIT
&& flag_pic
)
3878 if (flag_fentry
> 0)
3879 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3883 else if (TARGET_SEH
)
3885 if (flag_fentry
== 0)
3886 sorry ("-mno-fentry isn%'t compatible with SEH");
3889 else if (flag_fentry
< 0)
3891 #if defined(PROFILE_BEFORE_PROLOGUE)
3900 /* When not optimize for size, enable vzeroupper optimization for
3901 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3902 AVX unaligned load/store. */
3905 if (flag_expensive_optimizations
3906 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3907 target_flags
|= MASK_VZEROUPPER
;
3908 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3909 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3910 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3911 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3912 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3913 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3914 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3915 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
3916 target_flags
|= MASK_PREFER_AVX128
;
3921 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3922 target_flags
&= ~MASK_VZEROUPPER
;
3925 if (ix86_recip_name
)
3927 char *p
= ASTRDUP (ix86_recip_name
);
3929 unsigned int mask
, i
;
3932 while ((q
= strtok (p
, ",")) != NULL
)
3943 if (!strcmp (q
, "default"))
3944 mask
= RECIP_MASK_ALL
;
3947 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3948 if (!strcmp (q
, recip_options
[i
].string
))
3950 mask
= recip_options
[i
].mask
;
3954 if (i
== ARRAY_SIZE (recip_options
))
3956 error ("unknown option for -mrecip=%s", q
);
3958 mask
= RECIP_MASK_NONE
;
3962 recip_mask_explicit
|= mask
;
3964 recip_mask
&= ~mask
;
3971 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3972 else if (target_flags_explicit
& MASK_RECIP
)
3973 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3975 /* Save the initial options in case the user does function specific
3978 target_option_default_node
= target_option_current_node
3979 = build_target_option_node ();
3982 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3985 function_pass_avx256_p (const_rtx val
)
3990 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
3993 if (GET_CODE (val
) == PARALLEL
)
3998 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
4000 r
= XVECEXP (val
, 0, i
);
4001 if (GET_CODE (r
) == EXPR_LIST
4003 && REG_P (XEXP (r
, 0))
4004 && (GET_MODE (XEXP (r
, 0)) == OImode
4005 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
4013 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4016 ix86_option_override (void)
4018 ix86_option_override_internal (true);
4021 /* Update register usage after having seen the compiler flags. */
4024 ix86_conditional_register_usage (void)
4029 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4031 if (fixed_regs
[i
] > 1)
4032 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4033 if (call_used_regs
[i
] > 1)
4034 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4037 /* The PIC register, if it exists, is fixed. */
4038 j
= PIC_OFFSET_TABLE_REGNUM
;
4039 if (j
!= INVALID_REGNUM
)
4040 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4042 /* The 64-bit MS_ABI changes the set of call-used registers. */
4043 if (TARGET_64BIT_MS_ABI
)
4045 call_used_regs
[SI_REG
] = 0;
4046 call_used_regs
[DI_REG
] = 0;
4047 call_used_regs
[XMM6_REG
] = 0;
4048 call_used_regs
[XMM7_REG
] = 0;
4049 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4050 call_used_regs
[i
] = 0;
4053 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4054 other call-clobbered regs for 64-bit. */
4057 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4059 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4060 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4061 && call_used_regs
[i
])
4062 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4065 /* If MMX is disabled, squash the registers. */
4067 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4068 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4069 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4071 /* If SSE is disabled, squash the registers. */
4073 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4074 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4075 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4077 /* If the FPU is disabled, squash the registers. */
4078 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4079 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4080 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4081 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4083 /* If 32-bit, squash the 64-bit registers. */
4086 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4088 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4094 /* Save the current options */
4097 ix86_function_specific_save (struct cl_target_option
*ptr
)
4099 ptr
->arch
= ix86_arch
;
4100 ptr
->schedule
= ix86_schedule
;
4101 ptr
->tune
= ix86_tune
;
4102 ptr
->branch_cost
= ix86_branch_cost
;
4103 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4104 ptr
->arch_specified
= ix86_arch_specified
;
4105 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4106 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4107 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4109 /* The fields are char but the variables are not; make sure the
4110 values fit in the fields. */
4111 gcc_assert (ptr
->arch
== ix86_arch
);
4112 gcc_assert (ptr
->schedule
== ix86_schedule
);
4113 gcc_assert (ptr
->tune
== ix86_tune
);
4114 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4117 /* Restore the current options */
4120 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4122 enum processor_type old_tune
= ix86_tune
;
4123 enum processor_type old_arch
= ix86_arch
;
4124 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4127 ix86_arch
= (enum processor_type
) ptr
->arch
;
4128 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4129 ix86_tune
= (enum processor_type
) ptr
->tune
;
4130 ix86_branch_cost
= ptr
->branch_cost
;
4131 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4132 ix86_arch_specified
= ptr
->arch_specified
;
4133 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4134 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4135 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4137 /* Recreate the arch feature tests if the arch changed */
4138 if (old_arch
!= ix86_arch
)
4140 ix86_arch_mask
= 1u << ix86_arch
;
4141 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4142 ix86_arch_features
[i
]
4143 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4146 /* Recreate the tune optimization tests */
4147 if (old_tune
!= ix86_tune
)
4149 ix86_tune_mask
= 1u << ix86_tune
;
4150 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4151 ix86_tune_features
[i
]
4152 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4156 /* Print the current options */
4159 ix86_function_specific_print (FILE *file
, int indent
,
4160 struct cl_target_option
*ptr
)
4163 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4164 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4166 fprintf (file
, "%*sarch = %d (%s)\n",
4169 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4170 ? cpu_names
[ptr
->arch
]
4173 fprintf (file
, "%*stune = %d (%s)\n",
4176 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4177 ? cpu_names
[ptr
->tune
]
4180 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4184 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4185 free (target_string
);
4190 /* Inner function to process the attribute((target(...))), take an argument and
4191 set the current options from the argument. If we have a list, recursively go
4195 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4196 struct gcc_options
*enum_opts_set
)
4201 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4202 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4203 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4204 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4205 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4221 enum ix86_opt_type type
;
4226 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4227 IX86_ATTR_ISA ("abm", OPT_mabm
),
4228 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4229 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4230 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4231 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4232 IX86_ATTR_ISA ("aes", OPT_maes
),
4233 IX86_ATTR_ISA ("avx", OPT_mavx
),
4234 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4235 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4236 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4237 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4238 IX86_ATTR_ISA ("sse", OPT_msse
),
4239 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4240 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4241 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4242 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4243 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4244 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4245 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4246 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4247 IX86_ATTR_ISA ("fma", OPT_mfma
),
4248 IX86_ATTR_ISA ("xop", OPT_mxop
),
4249 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4250 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4251 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4252 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4253 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4254 IX86_ATTR_ISA ("hle", OPT_mhle
),
4257 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4259 /* string options */
4260 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4261 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4264 IX86_ATTR_YES ("cld",
4268 IX86_ATTR_NO ("fancy-math-387",
4269 OPT_mfancy_math_387
,
4270 MASK_NO_FANCY_MATH_387
),
4272 IX86_ATTR_YES ("ieee-fp",
4276 IX86_ATTR_YES ("inline-all-stringops",
4277 OPT_minline_all_stringops
,
4278 MASK_INLINE_ALL_STRINGOPS
),
4280 IX86_ATTR_YES ("inline-stringops-dynamically",
4281 OPT_minline_stringops_dynamically
,
4282 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4284 IX86_ATTR_NO ("align-stringops",
4285 OPT_mno_align_stringops
,
4286 MASK_NO_ALIGN_STRINGOPS
),
4288 IX86_ATTR_YES ("recip",
4294 /* If this is a list, recurse to get the options. */
4295 if (TREE_CODE (args
) == TREE_LIST
)
4299 for (; args
; args
= TREE_CHAIN (args
))
4300 if (TREE_VALUE (args
)
4301 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4302 p_strings
, enum_opts_set
))
4308 else if (TREE_CODE (args
) != STRING_CST
)
4311 /* Handle multiple arguments separated by commas. */
4312 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4314 while (next_optstr
&& *next_optstr
!= '\0')
4316 char *p
= next_optstr
;
4318 char *comma
= strchr (next_optstr
, ',');
4319 const char *opt_string
;
4320 size_t len
, opt_len
;
4325 enum ix86_opt_type type
= ix86_opt_unknown
;
4331 len
= comma
- next_optstr
;
4332 next_optstr
= comma
+ 1;
4340 /* Recognize no-xxx. */
4341 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4350 /* Find the option. */
4353 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4355 type
= attrs
[i
].type
;
4356 opt_len
= attrs
[i
].len
;
4357 if (ch
== attrs
[i
].string
[0]
4358 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4361 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4364 mask
= attrs
[i
].mask
;
4365 opt_string
= attrs
[i
].string
;
4370 /* Process the option. */
4373 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4377 else if (type
== ix86_opt_isa
)
4379 struct cl_decoded_option decoded
;
4381 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4382 ix86_handle_option (&global_options
, &global_options_set
,
4383 &decoded
, input_location
);
4386 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4388 if (type
== ix86_opt_no
)
4389 opt_set_p
= !opt_set_p
;
4392 target_flags
|= mask
;
4394 target_flags
&= ~mask
;
4397 else if (type
== ix86_opt_str
)
4401 error ("option(\"%s\") was already specified", opt_string
);
4405 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4408 else if (type
== ix86_opt_enum
)
4413 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4415 set_option (&global_options
, enum_opts_set
, opt
, value
,
4416 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4420 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4432 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4435 ix86_valid_target_attribute_tree (tree args
)
4437 const char *orig_arch_string
= ix86_arch_string
;
4438 const char *orig_tune_string
= ix86_tune_string
;
4439 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4440 int orig_tune_defaulted
= ix86_tune_defaulted
;
4441 int orig_arch_specified
= ix86_arch_specified
;
4442 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4445 struct cl_target_option
*def
4446 = TREE_TARGET_OPTION (target_option_default_node
);
4447 struct gcc_options enum_opts_set
;
4449 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4451 /* Process each of the options on the chain. */
4452 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4456 /* If the changed options are different from the default, rerun
4457 ix86_option_override_internal, and then save the options away.
4458 The string options are are attribute options, and will be undone
4459 when we copy the save structure. */
4460 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4461 || target_flags
!= def
->x_target_flags
4462 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4463 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4464 || enum_opts_set
.x_ix86_fpmath
)
4466 /* If we are using the default tune= or arch=, undo the string assigned,
4467 and use the default. */
4468 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4469 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4470 else if (!orig_arch_specified
)
4471 ix86_arch_string
= NULL
;
4473 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4474 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4475 else if (orig_tune_defaulted
)
4476 ix86_tune_string
= NULL
;
4478 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4479 if (enum_opts_set
.x_ix86_fpmath
)
4480 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4481 else if (!TARGET_64BIT
&& TARGET_SSE
)
4483 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4484 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4487 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4488 ix86_option_override_internal (false);
4490 /* Add any builtin functions with the new isa if any. */
4491 ix86_add_new_builtins (ix86_isa_flags
);
4493 /* Save the current options unless we are validating options for
4495 t
= build_target_option_node ();
4497 ix86_arch_string
= orig_arch_string
;
4498 ix86_tune_string
= orig_tune_string
;
4499 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4501 /* Free up memory allocated to hold the strings */
4502 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4503 free (option_strings
[i
]);
4509 /* Hook to validate attribute((target("string"))). */
4512 ix86_valid_target_attribute_p (tree fndecl
,
4513 tree
ARG_UNUSED (name
),
4515 int ARG_UNUSED (flags
))
4517 struct cl_target_option cur_target
;
4519 tree old_optimize
= build_optimization_node ();
4520 tree new_target
, new_optimize
;
4521 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4523 /* If the function changed the optimization levels as well as setting target
4524 options, start with the optimizations specified. */
4525 if (func_optimize
&& func_optimize
!= old_optimize
)
4526 cl_optimization_restore (&global_options
,
4527 TREE_OPTIMIZATION (func_optimize
));
4529 /* The target attributes may also change some optimization flags, so update
4530 the optimization options if necessary. */
4531 cl_target_option_save (&cur_target
, &global_options
);
4532 new_target
= ix86_valid_target_attribute_tree (args
);
4533 new_optimize
= build_optimization_node ();
4540 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4542 if (old_optimize
!= new_optimize
)
4543 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4546 cl_target_option_restore (&global_options
, &cur_target
);
4548 if (old_optimize
!= new_optimize
)
4549 cl_optimization_restore (&global_options
,
4550 TREE_OPTIMIZATION (old_optimize
));
4556 /* Hook to determine if one function can safely inline another. */
4559 ix86_can_inline_p (tree caller
, tree callee
)
4562 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4563 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4565 /* If callee has no option attributes, then it is ok to inline. */
4569 /* If caller has no option attributes, but callee does then it is not ok to
4571 else if (!caller_tree
)
4576 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4577 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4579 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4580 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4582 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4583 != callee_opts
->x_ix86_isa_flags
)
4586 /* See if we have the same non-isa options. */
4587 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4590 /* See if arch, tune, etc. are the same. */
4591 else if (caller_opts
->arch
!= callee_opts
->arch
)
4594 else if (caller_opts
->tune
!= callee_opts
->tune
)
4597 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4600 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4611 /* Remember the last target of ix86_set_current_function. */
4612 static GTY(()) tree ix86_previous_fndecl
;
4614 /* Establish appropriate back-end context for processing the function
4615 FNDECL. The argument might be NULL to indicate processing at top
4616 level, outside of any function scope. */
4618 ix86_set_current_function (tree fndecl
)
4620 /* Only change the context if the function changes. This hook is called
4621 several times in the course of compiling a function, and we don't want to
4622 slow things down too much or call target_reinit when it isn't safe. */
4623 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4625 tree old_tree
= (ix86_previous_fndecl
4626 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4629 tree new_tree
= (fndecl
4630 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4633 ix86_previous_fndecl
= fndecl
;
4634 if (old_tree
== new_tree
)
4639 cl_target_option_restore (&global_options
,
4640 TREE_TARGET_OPTION (new_tree
));
4646 struct cl_target_option
*def
4647 = TREE_TARGET_OPTION (target_option_current_node
);
4649 cl_target_option_restore (&global_options
, def
);
4656 /* Return true if this goes in large data/bss. */
4659 ix86_in_large_data_p (tree exp
)
4661 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4664 /* Functions are never large data. */
4665 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4668 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4670 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4671 if (strcmp (section
, ".ldata") == 0
4672 || strcmp (section
, ".lbss") == 0)
4678 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4680 /* If this is an incomplete type with size 0, then we can't put it
4681 in data because it might be too big when completed. */
4682 if (!size
|| size
> ix86_section_threshold
)
4689 /* Switch to the appropriate section for output of DECL.
4690 DECL is either a `VAR_DECL' node or a constant of some sort.
4691 RELOC indicates whether forming the initial value of DECL requires
4692 link-time relocations. */
4694 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4698 x86_64_elf_select_section (tree decl
, int reloc
,
4699 unsigned HOST_WIDE_INT align
)
4701 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4702 && ix86_in_large_data_p (decl
))
4704 const char *sname
= NULL
;
4705 unsigned int flags
= SECTION_WRITE
;
4706 switch (categorize_decl_for_section (decl
, reloc
))
4711 case SECCAT_DATA_REL
:
4712 sname
= ".ldata.rel";
4714 case SECCAT_DATA_REL_LOCAL
:
4715 sname
= ".ldata.rel.local";
4717 case SECCAT_DATA_REL_RO
:
4718 sname
= ".ldata.rel.ro";
4720 case SECCAT_DATA_REL_RO_LOCAL
:
4721 sname
= ".ldata.rel.ro.local";
4725 flags
|= SECTION_BSS
;
4728 case SECCAT_RODATA_MERGE_STR
:
4729 case SECCAT_RODATA_MERGE_STR_INIT
:
4730 case SECCAT_RODATA_MERGE_CONST
:
4734 case SECCAT_SRODATA
:
4741 /* We don't split these for medium model. Place them into
4742 default sections and hope for best. */
4747 /* We might get called with string constants, but get_named_section
4748 doesn't like them as they are not DECLs. Also, we need to set
4749 flags in that case. */
4751 return get_section (sname
, flags
, NULL
);
4752 return get_named_section (decl
, sname
, reloc
);
4755 return default_elf_select_section (decl
, reloc
, align
);
4758 /* Build up a unique section name, expressed as a
4759 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4760 RELOC indicates whether the initial value of EXP requires
4761 link-time relocations. */
4763 static void ATTRIBUTE_UNUSED
4764 x86_64_elf_unique_section (tree decl
, int reloc
)
4766 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4767 && ix86_in_large_data_p (decl
))
4769 const char *prefix
= NULL
;
4770 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4771 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4773 switch (categorize_decl_for_section (decl
, reloc
))
4776 case SECCAT_DATA_REL
:
4777 case SECCAT_DATA_REL_LOCAL
:
4778 case SECCAT_DATA_REL_RO
:
4779 case SECCAT_DATA_REL_RO_LOCAL
:
4780 prefix
= one_only
? ".ld" : ".ldata";
4783 prefix
= one_only
? ".lb" : ".lbss";
4786 case SECCAT_RODATA_MERGE_STR
:
4787 case SECCAT_RODATA_MERGE_STR_INIT
:
4788 case SECCAT_RODATA_MERGE_CONST
:
4789 prefix
= one_only
? ".lr" : ".lrodata";
4791 case SECCAT_SRODATA
:
4798 /* We don't split these for medium model. Place them into
4799 default sections and hope for best. */
4804 const char *name
, *linkonce
;
4807 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4808 name
= targetm
.strip_name_encoding (name
);
4810 /* If we're using one_only, then there needs to be a .gnu.linkonce
4811 prefix to the section name. */
4812 linkonce
= one_only
? ".gnu.linkonce" : "";
4814 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4816 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4820 default_unique_section (decl
, reloc
);
4823 #ifdef COMMON_ASM_OP
4824 /* This says how to output assembler code to declare an
4825 uninitialized external linkage data object.
4827 For medium model x86-64 we need to use .largecomm opcode for
4830 x86_elf_aligned_common (FILE *file
,
4831 const char *name
, unsigned HOST_WIDE_INT size
,
4834 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4835 && size
> (unsigned int)ix86_section_threshold
)
4836 fputs (".largecomm\t", file
);
4838 fputs (COMMON_ASM_OP
, file
);
4839 assemble_name (file
, name
);
4840 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4841 size
, align
/ BITS_PER_UNIT
);
4845 /* Utility function for targets to use in implementing
4846 ASM_OUTPUT_ALIGNED_BSS. */
4849 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4850 const char *name
, unsigned HOST_WIDE_INT size
,
4853 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4854 && size
> (unsigned int)ix86_section_threshold
)
4855 switch_to_section (get_named_section (decl
, ".lbss", 0));
4857 switch_to_section (bss_section
);
4858 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4859 #ifdef ASM_DECLARE_OBJECT_NAME
4860 last_assemble_variable_decl
= decl
;
4861 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4863 /* Standard thing is just output label for the object. */
4864 ASM_OUTPUT_LABEL (file
, name
);
4865 #endif /* ASM_DECLARE_OBJECT_NAME */
4866 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4869 /* Decide whether we must probe the stack before any space allocation
4870 on this target. It's essentially TARGET_STACK_PROBE except when
4871 -fstack-check causes the stack to be already probed differently. */
4874 ix86_target_stack_probe (void)
4876 /* Do not probe the stack twice if static stack checking is enabled. */
4877 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4880 return TARGET_STACK_PROBE
;
4883 /* Decide whether we can make a sibling call to a function. DECL is the
4884 declaration of the function being targeted by the call and EXP is the
4885 CALL_EXPR representing the call. */
4888 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4890 tree type
, decl_or_type
;
4893 /* If we are generating position-independent code, we cannot sibcall
4894 optimize any indirect call, or a direct call to a global function,
4895 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4899 && (!decl
|| !targetm
.binds_local_p (decl
)))
4902 /* If we need to align the outgoing stack, then sibcalling would
4903 unalign the stack, which may break the called function. */
4904 if (ix86_minimum_incoming_stack_boundary (true)
4905 < PREFERRED_STACK_BOUNDARY
)
4910 decl_or_type
= decl
;
4911 type
= TREE_TYPE (decl
);
4915 /* We're looking at the CALL_EXPR, we need the type of the function. */
4916 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4917 type
= TREE_TYPE (type
); /* pointer type */
4918 type
= TREE_TYPE (type
); /* function type */
4919 decl_or_type
= type
;
4922 /* Check that the return value locations are the same. Like
4923 if we are returning floats on the 80387 register stack, we cannot
4924 make a sibcall from a function that doesn't return a float to a
4925 function that does or, conversely, from a function that does return
4926 a float to a function that doesn't; the necessary stack adjustment
4927 would not be executed. This is also the place we notice
4928 differences in the return value ABI. Note that it is ok for one
4929 of the functions to have void return type as long as the return
4930 value of the other is passed in a register. */
4931 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4932 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4934 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4936 if (!rtx_equal_p (a
, b
))
4939 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4941 /* Disable sibcall if we need to generate vzeroupper after
4943 if (TARGET_VZEROUPPER
4944 && cfun
->machine
->callee_return_avx256_p
4945 && !cfun
->machine
->caller_return_avx256_p
)
4948 else if (!rtx_equal_p (a
, b
))
4953 /* The SYSV ABI has more call-clobbered registers;
4954 disallow sibcalls from MS to SYSV. */
4955 if (cfun
->machine
->call_abi
== MS_ABI
4956 && ix86_function_type_abi (type
) == SYSV_ABI
)
4961 /* If this call is indirect, we'll need to be able to use a
4962 call-clobbered register for the address of the target function.
4963 Make sure that all such registers are not used for passing
4964 parameters. Note that DLLIMPORT functions are indirect. */
4966 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4968 if (ix86_function_regparm (type
, NULL
) >= 3)
4970 /* ??? Need to count the actual number of registers to be used,
4971 not the possible number of registers. Fix later. */
4977 /* Otherwise okay. That also includes certain types of indirect calls. */
4981 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4982 and "sseregparm" calling convention attributes;
4983 arguments as in struct attribute_spec.handler. */
4986 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4988 int flags ATTRIBUTE_UNUSED
,
4991 if (TREE_CODE (*node
) != FUNCTION_TYPE
4992 && TREE_CODE (*node
) != METHOD_TYPE
4993 && TREE_CODE (*node
) != FIELD_DECL
4994 && TREE_CODE (*node
) != TYPE_DECL
)
4996 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4998 *no_add_attrs
= true;
5002 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5003 if (is_attribute_p ("regparm", name
))
5007 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5009 error ("fastcall and regparm attributes are not compatible");
5012 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5014 error ("regparam and thiscall attributes are not compatible");
5017 cst
= TREE_VALUE (args
);
5018 if (TREE_CODE (cst
) != INTEGER_CST
)
5020 warning (OPT_Wattributes
,
5021 "%qE attribute requires an integer constant argument",
5023 *no_add_attrs
= true;
5025 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5027 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5029 *no_add_attrs
= true;
5037 /* Do not warn when emulating the MS ABI. */
5038 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5039 && TREE_CODE (*node
) != METHOD_TYPE
)
5040 || ix86_function_type_abi (*node
) != MS_ABI
)
5041 warning (OPT_Wattributes
, "%qE attribute ignored",
5043 *no_add_attrs
= true;
5047 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5048 if (is_attribute_p ("fastcall", name
))
5050 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5052 error ("fastcall and cdecl attributes are not compatible");
5054 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5056 error ("fastcall and stdcall attributes are not compatible");
5058 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5060 error ("fastcall and regparm attributes are not compatible");
5062 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5064 error ("fastcall and thiscall attributes are not compatible");
5068 /* Can combine stdcall with fastcall (redundant), regparm and
5070 else if (is_attribute_p ("stdcall", name
))
5072 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5074 error ("stdcall and cdecl attributes are not compatible");
5076 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5078 error ("stdcall and fastcall attributes are not compatible");
5080 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5082 error ("stdcall and thiscall attributes are not compatible");
5086 /* Can combine cdecl with regparm and sseregparm. */
5087 else if (is_attribute_p ("cdecl", name
))
5089 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5091 error ("stdcall and cdecl attributes are not compatible");
5093 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5095 error ("fastcall and cdecl attributes are not compatible");
5097 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5099 error ("cdecl and thiscall attributes are not compatible");
5102 else if (is_attribute_p ("thiscall", name
))
5104 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5105 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5107 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5109 error ("stdcall and thiscall attributes are not compatible");
5111 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5113 error ("fastcall and thiscall attributes are not compatible");
5115 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5117 error ("cdecl and thiscall attributes are not compatible");
5121 /* Can combine sseregparm with all attributes. */
5126 /* The transactional memory builtins are implicitly regparm or fastcall
5127 depending on the ABI. Override the generic do-nothing attribute that
5128 these builtins were declared with, and replace it with one of the two
5129 attributes that we expect elsewhere. */
5132 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5133 tree args ATTRIBUTE_UNUSED
,
5134 int flags ATTRIBUTE_UNUSED
,
5139 /* In no case do we want to add the placeholder attribute. */
5140 *no_add_attrs
= true;
5142 /* The 64-bit ABI is unchanged for transactional memory. */
5146 /* ??? Is there a better way to validate 32-bit windows? We have
5147 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5148 if (CHECK_STACK_LIMIT
> 0)
5149 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5152 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5153 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5155 decl_attributes (node
, alt
, flags
);
5160 /* This function determines from TYPE the calling-convention. */
5163 ix86_get_callcvt (const_tree type
)
5165 unsigned int ret
= 0;
5170 return IX86_CALLCVT_CDECL
;
5172 attrs
= TYPE_ATTRIBUTES (type
);
5173 if (attrs
!= NULL_TREE
)
5175 if (lookup_attribute ("cdecl", attrs
))
5176 ret
|= IX86_CALLCVT_CDECL
;
5177 else if (lookup_attribute ("stdcall", attrs
))
5178 ret
|= IX86_CALLCVT_STDCALL
;
5179 else if (lookup_attribute ("fastcall", attrs
))
5180 ret
|= IX86_CALLCVT_FASTCALL
;
5181 else if (lookup_attribute ("thiscall", attrs
))
5182 ret
|= IX86_CALLCVT_THISCALL
;
5184 /* Regparam isn't allowed for thiscall and fastcall. */
5185 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5187 if (lookup_attribute ("regparm", attrs
))
5188 ret
|= IX86_CALLCVT_REGPARM
;
5189 if (lookup_attribute ("sseregparm", attrs
))
5190 ret
|= IX86_CALLCVT_SSEREGPARM
;
5193 if (IX86_BASE_CALLCVT(ret
) != 0)
5197 is_stdarg
= stdarg_p (type
);
5198 if (TARGET_RTD
&& !is_stdarg
)
5199 return IX86_CALLCVT_STDCALL
| ret
;
5203 || TREE_CODE (type
) != METHOD_TYPE
5204 || ix86_function_type_abi (type
) != MS_ABI
)
5205 return IX86_CALLCVT_CDECL
| ret
;
5207 return IX86_CALLCVT_THISCALL
;
5210 /* Return 0 if the attributes for two types are incompatible, 1 if they
5211 are compatible, and 2 if they are nearly compatible (which causes a
5212 warning to be generated). */
5215 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5217 unsigned int ccvt1
, ccvt2
;
5219 if (TREE_CODE (type1
) != FUNCTION_TYPE
5220 && TREE_CODE (type1
) != METHOD_TYPE
)
5223 ccvt1
= ix86_get_callcvt (type1
);
5224 ccvt2
= ix86_get_callcvt (type2
);
5227 if (ix86_function_regparm (type1
, NULL
)
5228 != ix86_function_regparm (type2
, NULL
))
5234 /* Return the regparm value for a function with the indicated TYPE and DECL.
5235 DECL may be NULL when calling function indirectly
5236 or considering a libcall. */
5239 ix86_function_regparm (const_tree type
, const_tree decl
)
5246 return (ix86_function_type_abi (type
) == SYSV_ABI
5247 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5248 ccvt
= ix86_get_callcvt (type
);
5249 regparm
= ix86_regparm
;
5251 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5253 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5256 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5260 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5262 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5265 /* Use register calling convention for local functions when possible. */
5267 && TREE_CODE (decl
) == FUNCTION_DECL
5269 && !(profile_flag
&& !flag_fentry
))
5271 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5272 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5273 if (i
&& i
->local
&& i
->can_change_signature
)
5275 int local_regparm
, globals
= 0, regno
;
5277 /* Make sure no regparm register is taken by a
5278 fixed register variable. */
5279 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5280 if (fixed_regs
[local_regparm
])
5283 /* We don't want to use regparm(3) for nested functions as
5284 these use a static chain pointer in the third argument. */
5285 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5288 /* In 32-bit mode save a register for the split stack. */
5289 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5292 /* Each fixed register usage increases register pressure,
5293 so less registers should be used for argument passing.
5294 This functionality can be overriden by an explicit
5296 for (regno
= 0; regno
<= DI_REG
; regno
++)
5297 if (fixed_regs
[regno
])
5301 = globals
< local_regparm
? local_regparm
- globals
: 0;
5303 if (local_regparm
> regparm
)
5304 regparm
= local_regparm
;
5311 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5312 DFmode (2) arguments in SSE registers for a function with the
5313 indicated TYPE and DECL. DECL may be NULL when calling function
5314 indirectly or considering a libcall. Otherwise return 0. */
5317 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5319 gcc_assert (!TARGET_64BIT
);
5321 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5322 by the sseregparm attribute. */
5323 if (TARGET_SSEREGPARM
5324 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5331 error ("calling %qD with attribute sseregparm without "
5332 "SSE/SSE2 enabled", decl
);
5334 error ("calling %qT with attribute sseregparm without "
5335 "SSE/SSE2 enabled", type
);
5343 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5344 (and DFmode for SSE2) arguments in SSE registers. */
5345 if (decl
&& TARGET_SSE_MATH
&& optimize
5346 && !(profile_flag
&& !flag_fentry
))
5348 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5349 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5350 if (i
&& i
->local
&& i
->can_change_signature
)
5351 return TARGET_SSE2
? 2 : 1;
5357 /* Return true if EAX is live at the start of the function. Used by
5358 ix86_expand_prologue to determine if we need special help before
5359 calling allocate_stack_worker. */
5362 ix86_eax_live_at_start_p (void)
5364 /* Cheat. Don't bother working forward from ix86_function_regparm
5365 to the function type to whether an actual argument is located in
5366 eax. Instead just look at cfg info, which is still close enough
5367 to correct at this point. This gives false positives for broken
5368 functions that might use uninitialized data that happens to be
5369 allocated in eax, but who cares? */
5370 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5374 ix86_keep_aggregate_return_pointer (tree fntype
)
5380 attr
= lookup_attribute ("callee_pop_aggregate_return",
5381 TYPE_ATTRIBUTES (fntype
));
5383 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5385 /* For 32-bit MS-ABI the default is to keep aggregate
5387 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5390 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5393 /* Value is the number of bytes of arguments automatically
5394 popped when returning from a subroutine call.
5395 FUNDECL is the declaration node of the function (as a tree),
5396 FUNTYPE is the data type of the function (as a tree),
5397 or for a library call it is an identifier node for the subroutine name.
5398 SIZE is the number of bytes of arguments passed on the stack.
5400 On the 80386, the RTD insn may be used to pop them if the number
5401 of args is fixed, but if the number is variable then the caller
5402 must pop them all. RTD can't be used for library calls now
5403 because the library is compiled with the Unix compiler.
5404 Use of RTD is a selectable option, since it is incompatible with
5405 standard Unix calling sequences. If the option is not selected,
5406 the caller must always pop the args.
5408 The attribute stdcall is equivalent to RTD on a per module basis. */
5411 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5415 /* None of the 64-bit ABIs pop arguments. */
5419 ccvt
= ix86_get_callcvt (funtype
);
5421 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5422 | IX86_CALLCVT_THISCALL
)) != 0
5423 && ! stdarg_p (funtype
))
5426 /* Lose any fake structure return argument if it is passed on the stack. */
5427 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5428 && !ix86_keep_aggregate_return_pointer (funtype
))
5430 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5432 return GET_MODE_SIZE (Pmode
);
5438 /* Argument support functions. */
5440 /* Return true when register may be used to pass function parameters. */
5442 ix86_function_arg_regno_p (int regno
)
5445 const int *parm_regs
;
5450 return (regno
< REGPARM_MAX
5451 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5453 return (regno
< REGPARM_MAX
5454 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5455 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5456 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5457 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5462 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5467 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5468 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5472 /* TODO: The function should depend on current function ABI but
5473 builtins.c would need updating then. Therefore we use the
5476 /* RAX is used as hidden argument to va_arg functions. */
5477 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5480 if (ix86_abi
== MS_ABI
)
5481 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5483 parm_regs
= x86_64_int_parameter_registers
;
5484 for (i
= 0; i
< (ix86_abi
== MS_ABI
5485 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5486 if (regno
== parm_regs
[i
])
5491 /* Return if we do not know how to pass TYPE solely in registers. */
5494 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5496 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5499 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5500 The layout_type routine is crafty and tries to trick us into passing
5501 currently unsupported vector types on the stack by using TImode. */
5502 return (!TARGET_64BIT
&& mode
== TImode
5503 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5506 /* It returns the size, in bytes, of the area reserved for arguments passed
5507 in registers for the function represented by fndecl dependent to the used
5510 ix86_reg_parm_stack_space (const_tree fndecl
)
5512 enum calling_abi call_abi
= SYSV_ABI
;
5513 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5514 call_abi
= ix86_function_abi (fndecl
);
5516 call_abi
= ix86_function_type_abi (fndecl
);
5517 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5522 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5525 ix86_function_type_abi (const_tree fntype
)
5527 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5529 enum calling_abi abi
= ix86_abi
;
5530 if (abi
== SYSV_ABI
)
5532 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5535 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5543 ix86_function_ms_hook_prologue (const_tree fn
)
5545 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5547 if (decl_function_context (fn
) != NULL_TREE
)
5548 error_at (DECL_SOURCE_LOCATION (fn
),
5549 "ms_hook_prologue is not compatible with nested function");
5556 static enum calling_abi
5557 ix86_function_abi (const_tree fndecl
)
5561 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5564 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5567 ix86_cfun_abi (void)
5571 return cfun
->machine
->call_abi
;
5574 /* Write the extra assembler code needed to declare a function properly. */
5577 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5580 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5584 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5585 unsigned int filler_cc
= 0xcccccccc;
5587 for (i
= 0; i
< filler_count
; i
+= 4)
5588 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5591 #ifdef SUBTARGET_ASM_UNWIND_INIT
5592 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5595 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5597 /* Output magic byte marker, if hot-patch attribute is set. */
5602 /* leaq [%rsp + 0], %rsp */
5603 asm_fprintf (asm_out_file
, ASM_BYTE
5604 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5608 /* movl.s %edi, %edi
5610 movl.s %esp, %ebp */
5611 asm_fprintf (asm_out_file
, ASM_BYTE
5612 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5618 extern void init_regs (void);
5620 /* Implementation of call abi switching target hook. Specific to FNDECL
5621 the specific call register sets are set. See also
5622 ix86_conditional_register_usage for more details. */
5624 ix86_call_abi_override (const_tree fndecl
)
5626 if (fndecl
== NULL_TREE
)
5627 cfun
->machine
->call_abi
= ix86_abi
;
5629 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5632 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5633 expensive re-initialization of init_regs each time we switch function context
5634 since this is needed only during RTL expansion. */
5636 ix86_maybe_switch_abi (void)
5639 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5643 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5644 for a call to a function whose data type is FNTYPE.
5645 For a library call, FNTYPE is 0. */
5648 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5649 tree fntype
, /* tree ptr for function decl */
5650 rtx libname
, /* SYMBOL_REF of library name or 0 */
5654 struct cgraph_local_info
*i
;
5657 memset (cum
, 0, sizeof (*cum
));
5659 /* Initialize for the current callee. */
5662 cfun
->machine
->callee_pass_avx256_p
= false;
5663 cfun
->machine
->callee_return_avx256_p
= false;
5668 i
= cgraph_local_info (fndecl
);
5669 cum
->call_abi
= ix86_function_abi (fndecl
);
5670 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5675 cum
->call_abi
= ix86_function_type_abi (fntype
);
5677 fnret_type
= TREE_TYPE (fntype
);
5682 if (TARGET_VZEROUPPER
&& fnret_type
)
5684 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5686 if (function_pass_avx256_p (fnret_value
))
5688 /* The return value of this function uses 256bit AVX modes. */
5690 cfun
->machine
->callee_return_avx256_p
= true;
5692 cfun
->machine
->caller_return_avx256_p
= true;
5696 cum
->caller
= caller
;
5698 /* Set up the number of registers to use for passing arguments. */
5700 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5701 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5702 "or subtarget optimization implying it");
5703 cum
->nregs
= ix86_regparm
;
5706 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5707 ? X86_64_REGPARM_MAX
5708 : X86_64_MS_REGPARM_MAX
);
5712 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5715 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5716 ? X86_64_SSE_REGPARM_MAX
5717 : X86_64_MS_SSE_REGPARM_MAX
);
5721 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5722 cum
->warn_avx
= true;
5723 cum
->warn_sse
= true;
5724 cum
->warn_mmx
= true;
5726 /* Because type might mismatch in between caller and callee, we need to
5727 use actual type of function for local calls.
5728 FIXME: cgraph_analyze can be told to actually record if function uses
5729 va_start so for local functions maybe_vaarg can be made aggressive
5731 FIXME: once typesytem is fixed, we won't need this code anymore. */
5732 if (i
&& i
->local
&& i
->can_change_signature
)
5733 fntype
= TREE_TYPE (fndecl
);
5734 cum
->maybe_vaarg
= (fntype
5735 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5740 /* If there are variable arguments, then we won't pass anything
5741 in registers in 32-bit mode. */
5742 if (stdarg_p (fntype
))
5753 /* Use ecx and edx registers if function has fastcall attribute,
5754 else look for regparm information. */
5757 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5758 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5761 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5763 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5769 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5772 /* Set up the number of SSE registers used for passing SFmode
5773 and DFmode arguments. Warn for mismatching ABI. */
5774 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5778 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5779 But in the case of vector types, it is some vector mode.
5781 When we have only some of our vector isa extensions enabled, then there
5782 are some modes for which vector_mode_supported_p is false. For these
5783 modes, the generic vector support in gcc will choose some non-vector mode
5784 in order to implement the type. By computing the natural mode, we'll
5785 select the proper ABI location for the operand and not depend on whatever
5786 the middle-end decides to do with these vector types.
5788 The midde-end can't deal with the vector types > 16 bytes. In this
5789 case, we return the original mode and warn ABI change if CUM isn't
5792 static enum machine_mode
5793 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5795 enum machine_mode mode
= TYPE_MODE (type
);
5797 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5799 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5800 if ((size
== 8 || size
== 16 || size
== 32)
5801 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5802 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5804 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5806 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5807 mode
= MIN_MODE_VECTOR_FLOAT
;
5809 mode
= MIN_MODE_VECTOR_INT
;
5811 /* Get the mode which has this inner mode and number of units. */
5812 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5813 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5814 && GET_MODE_INNER (mode
) == innermode
)
5816 if (size
== 32 && !TARGET_AVX
)
5818 static bool warnedavx
;
5825 warning (0, "AVX vector argument without AVX "
5826 "enabled changes the ABI");
5828 return TYPE_MODE (type
);
5841 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5842 this may not agree with the mode that the type system has chosen for the
5843 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5844 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5847 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5852 if (orig_mode
!= BLKmode
)
5853 tmp
= gen_rtx_REG (orig_mode
, regno
);
5856 tmp
= gen_rtx_REG (mode
, regno
);
5857 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5858 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5864 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5865 of this code is to classify each 8bytes of incoming argument by the register
5866 class and assign registers accordingly. */
5868 /* Return the union class of CLASS1 and CLASS2.
5869 See the x86-64 PS ABI for details. */
5871 static enum x86_64_reg_class
5872 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5874 /* Rule #1: If both classes are equal, this is the resulting class. */
5875 if (class1
== class2
)
5878 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5880 if (class1
== X86_64_NO_CLASS
)
5882 if (class2
== X86_64_NO_CLASS
)
5885 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5886 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5887 return X86_64_MEMORY_CLASS
;
5889 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5890 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5891 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5892 return X86_64_INTEGERSI_CLASS
;
5893 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5894 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5895 return X86_64_INTEGER_CLASS
;
5897 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5899 if (class1
== X86_64_X87_CLASS
5900 || class1
== X86_64_X87UP_CLASS
5901 || class1
== X86_64_COMPLEX_X87_CLASS
5902 || class2
== X86_64_X87_CLASS
5903 || class2
== X86_64_X87UP_CLASS
5904 || class2
== X86_64_COMPLEX_X87_CLASS
)
5905 return X86_64_MEMORY_CLASS
;
5907 /* Rule #6: Otherwise class SSE is used. */
5908 return X86_64_SSE_CLASS
;
5911 /* Classify the argument of type TYPE and mode MODE.
5912 CLASSES will be filled by the register class used to pass each word
5913 of the operand. The number of words is returned. In case the parameter
5914 should be passed in memory, 0 is returned. As a special case for zero
5915 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5917 BIT_OFFSET is used internally for handling records and specifies offset
5918 of the offset in bits modulo 256 to avoid overflow cases.
5920 See the x86-64 PS ABI for details.
5924 classify_argument (enum machine_mode mode
, const_tree type
,
5925 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5927 HOST_WIDE_INT bytes
=
5928 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5930 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5932 /* Variable sized entities are always passed/returned in memory. */
5936 if (mode
!= VOIDmode
5937 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5940 if (type
&& AGGREGATE_TYPE_P (type
))
5944 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5946 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5950 for (i
= 0; i
< words
; i
++)
5951 classes
[i
] = X86_64_NO_CLASS
;
5953 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5954 signalize memory class, so handle it as special case. */
5957 classes
[0] = X86_64_NO_CLASS
;
5961 /* Classify each field of record and merge classes. */
5962 switch (TREE_CODE (type
))
5965 /* And now merge the fields of structure. */
5966 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5968 if (TREE_CODE (field
) == FIELD_DECL
)
5972 if (TREE_TYPE (field
) == error_mark_node
)
5975 /* Bitfields are always classified as integer. Handle them
5976 early, since later code would consider them to be
5977 misaligned integers. */
5978 if (DECL_BIT_FIELD (field
))
5980 for (i
= (int_bit_position (field
)
5981 + (bit_offset
% 64)) / 8 / 8;
5982 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5983 + tree_low_cst (DECL_SIZE (field
), 0)
5986 merge_classes (X86_64_INTEGER_CLASS
,
5993 type
= TREE_TYPE (field
);
5995 /* Flexible array member is ignored. */
5996 if (TYPE_MODE (type
) == BLKmode
5997 && TREE_CODE (type
) == ARRAY_TYPE
5998 && TYPE_SIZE (type
) == NULL_TREE
5999 && TYPE_DOMAIN (type
) != NULL_TREE
6000 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6005 if (!warned
&& warn_psabi
)
6008 inform (input_location
,
6009 "the ABI of passing struct with"
6010 " a flexible array member has"
6011 " changed in GCC 4.4");
6015 num
= classify_argument (TYPE_MODE (type
), type
,
6017 (int_bit_position (field
)
6018 + bit_offset
) % 256);
6021 pos
= (int_bit_position (field
)
6022 + (bit_offset
% 64)) / 8 / 8;
6023 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6025 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6032 /* Arrays are handled as small records. */
6035 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6036 TREE_TYPE (type
), subclasses
, bit_offset
);
6040 /* The partial classes are now full classes. */
6041 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6042 subclasses
[0] = X86_64_SSE_CLASS
;
6043 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6044 && !((bit_offset
% 64) == 0 && bytes
== 4))
6045 subclasses
[0] = X86_64_INTEGER_CLASS
;
6047 for (i
= 0; i
< words
; i
++)
6048 classes
[i
] = subclasses
[i
% num
];
6053 case QUAL_UNION_TYPE
:
6054 /* Unions are similar to RECORD_TYPE but offset is always 0.
6056 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6058 if (TREE_CODE (field
) == FIELD_DECL
)
6062 if (TREE_TYPE (field
) == error_mark_node
)
6065 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6066 TREE_TYPE (field
), subclasses
,
6070 for (i
= 0; i
< num
; i
++)
6071 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6082 /* When size > 16 bytes, if the first one isn't
6083 X86_64_SSE_CLASS or any other ones aren't
6084 X86_64_SSEUP_CLASS, everything should be passed in
6086 if (classes
[0] != X86_64_SSE_CLASS
)
6089 for (i
= 1; i
< words
; i
++)
6090 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6094 /* Final merger cleanup. */
6095 for (i
= 0; i
< words
; i
++)
6097 /* If one class is MEMORY, everything should be passed in
6099 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6102 /* The X86_64_SSEUP_CLASS should be always preceded by
6103 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6104 if (classes
[i
] == X86_64_SSEUP_CLASS
6105 && classes
[i
- 1] != X86_64_SSE_CLASS
6106 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6108 /* The first one should never be X86_64_SSEUP_CLASS. */
6109 gcc_assert (i
!= 0);
6110 classes
[i
] = X86_64_SSE_CLASS
;
6113 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6114 everything should be passed in memory. */
6115 if (classes
[i
] == X86_64_X87UP_CLASS
6116 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6120 /* The first one should never be X86_64_X87UP_CLASS. */
6121 gcc_assert (i
!= 0);
6122 if (!warned
&& warn_psabi
)
6125 inform (input_location
,
6126 "the ABI of passing union with long double"
6127 " has changed in GCC 4.4");
6135 /* Compute alignment needed. We align all types to natural boundaries with
6136 exception of XFmode that is aligned to 64bits. */
6137 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6139 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6142 mode_alignment
= 128;
6143 else if (mode
== XCmode
)
6144 mode_alignment
= 256;
6145 if (COMPLEX_MODE_P (mode
))
6146 mode_alignment
/= 2;
6147 /* Misaligned fields are always returned in memory. */
6148 if (bit_offset
% mode_alignment
)
6152 /* for V1xx modes, just use the base mode */
6153 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6154 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6155 mode
= GET_MODE_INNER (mode
);
6157 /* Classification of atomic types. */
6162 classes
[0] = X86_64_SSE_CLASS
;
6165 classes
[0] = X86_64_SSE_CLASS
;
6166 classes
[1] = X86_64_SSEUP_CLASS
;
6176 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6180 classes
[0] = X86_64_INTEGERSI_CLASS
;
6183 else if (size
<= 64)
6185 classes
[0] = X86_64_INTEGER_CLASS
;
6188 else if (size
<= 64+32)
6190 classes
[0] = X86_64_INTEGER_CLASS
;
6191 classes
[1] = X86_64_INTEGERSI_CLASS
;
6194 else if (size
<= 64+64)
6196 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6204 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6208 /* OImode shouldn't be used directly. */
6213 if (!(bit_offset
% 64))
6214 classes
[0] = X86_64_SSESF_CLASS
;
6216 classes
[0] = X86_64_SSE_CLASS
;
6219 classes
[0] = X86_64_SSEDF_CLASS
;
6222 classes
[0] = X86_64_X87_CLASS
;
6223 classes
[1] = X86_64_X87UP_CLASS
;
6226 classes
[0] = X86_64_SSE_CLASS
;
6227 classes
[1] = X86_64_SSEUP_CLASS
;
6230 classes
[0] = X86_64_SSE_CLASS
;
6231 if (!(bit_offset
% 64))
6237 if (!warned
&& warn_psabi
)
6240 inform (input_location
,
6241 "the ABI of passing structure with complex float"
6242 " member has changed in GCC 4.4");
6244 classes
[1] = X86_64_SSESF_CLASS
;
6248 classes
[0] = X86_64_SSEDF_CLASS
;
6249 classes
[1] = X86_64_SSEDF_CLASS
;
6252 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6255 /* This modes is larger than 16 bytes. */
6263 classes
[0] = X86_64_SSE_CLASS
;
6264 classes
[1] = X86_64_SSEUP_CLASS
;
6265 classes
[2] = X86_64_SSEUP_CLASS
;
6266 classes
[3] = X86_64_SSEUP_CLASS
;
6274 classes
[0] = X86_64_SSE_CLASS
;
6275 classes
[1] = X86_64_SSEUP_CLASS
;
6283 classes
[0] = X86_64_SSE_CLASS
;
6289 gcc_assert (VECTOR_MODE_P (mode
));
6294 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6296 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6297 classes
[0] = X86_64_INTEGERSI_CLASS
;
6299 classes
[0] = X86_64_INTEGER_CLASS
;
6300 classes
[1] = X86_64_INTEGER_CLASS
;
6301 return 1 + (bytes
> 8);
6305 /* Examine the argument and return set number of register required in each
6306 class. Return 0 iff parameter should be passed in memory. */
6308 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6309 int *int_nregs
, int *sse_nregs
)
6311 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6312 int n
= classify_argument (mode
, type
, regclass
, 0);
6318 for (n
--; n
>= 0; n
--)
6319 switch (regclass
[n
])
6321 case X86_64_INTEGER_CLASS
:
6322 case X86_64_INTEGERSI_CLASS
:
6325 case X86_64_SSE_CLASS
:
6326 case X86_64_SSESF_CLASS
:
6327 case X86_64_SSEDF_CLASS
:
6330 case X86_64_NO_CLASS
:
6331 case X86_64_SSEUP_CLASS
:
6333 case X86_64_X87_CLASS
:
6334 case X86_64_X87UP_CLASS
:
6338 case X86_64_COMPLEX_X87_CLASS
:
6339 return in_return
? 2 : 0;
6340 case X86_64_MEMORY_CLASS
:
6346 /* Construct container for the argument used by GCC interface. See
6347 FUNCTION_ARG for the detailed description. */
6350 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6351 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6352 const int *intreg
, int sse_regno
)
6354 /* The following variables hold the static issued_error state. */
6355 static bool issued_sse_arg_error
;
6356 static bool issued_sse_ret_error
;
6357 static bool issued_x87_ret_error
;
6359 enum machine_mode tmpmode
;
6361 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6362 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6366 int needed_sseregs
, needed_intregs
;
6367 rtx exp
[MAX_CLASSES
];
6370 n
= classify_argument (mode
, type
, regclass
, 0);
6373 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6376 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6379 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6380 some less clueful developer tries to use floating-point anyway. */
6381 if (needed_sseregs
&& !TARGET_SSE
)
6385 if (!issued_sse_ret_error
)
6387 error ("SSE register return with SSE disabled");
6388 issued_sse_ret_error
= true;
6391 else if (!issued_sse_arg_error
)
6393 error ("SSE register argument with SSE disabled");
6394 issued_sse_arg_error
= true;
6399 /* Likewise, error if the ABI requires us to return values in the
6400 x87 registers and the user specified -mno-80387. */
6401 if (!TARGET_80387
&& in_return
)
6402 for (i
= 0; i
< n
; i
++)
6403 if (regclass
[i
] == X86_64_X87_CLASS
6404 || regclass
[i
] == X86_64_X87UP_CLASS
6405 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6407 if (!issued_x87_ret_error
)
6409 error ("x87 register return with x87 disabled");
6410 issued_x87_ret_error
= true;
6415 /* First construct simple cases. Avoid SCmode, since we want to use
6416 single register to pass this type. */
6417 if (n
== 1 && mode
!= SCmode
)
6418 switch (regclass
[0])
6420 case X86_64_INTEGER_CLASS
:
6421 case X86_64_INTEGERSI_CLASS
:
6422 return gen_rtx_REG (mode
, intreg
[0]);
6423 case X86_64_SSE_CLASS
:
6424 case X86_64_SSESF_CLASS
:
6425 case X86_64_SSEDF_CLASS
:
6426 if (mode
!= BLKmode
)
6427 return gen_reg_or_parallel (mode
, orig_mode
,
6428 SSE_REGNO (sse_regno
));
6430 case X86_64_X87_CLASS
:
6431 case X86_64_COMPLEX_X87_CLASS
:
6432 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6433 case X86_64_NO_CLASS
:
6434 /* Zero sized array, struct or class. */
6440 && regclass
[0] == X86_64_SSE_CLASS
6441 && regclass
[1] == X86_64_SSEUP_CLASS
6443 return gen_reg_or_parallel (mode
, orig_mode
,
6444 SSE_REGNO (sse_regno
));
6446 && regclass
[0] == X86_64_SSE_CLASS
6447 && regclass
[1] == X86_64_SSEUP_CLASS
6448 && regclass
[2] == X86_64_SSEUP_CLASS
6449 && regclass
[3] == X86_64_SSEUP_CLASS
6451 return gen_reg_or_parallel (mode
, orig_mode
,
6452 SSE_REGNO (sse_regno
));
6454 && regclass
[0] == X86_64_X87_CLASS
6455 && regclass
[1] == X86_64_X87UP_CLASS
)
6456 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6459 && regclass
[0] == X86_64_INTEGER_CLASS
6460 && regclass
[1] == X86_64_INTEGER_CLASS
6461 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6462 && intreg
[0] + 1 == intreg
[1])
6463 return gen_rtx_REG (mode
, intreg
[0]);
6465 /* Otherwise figure out the entries of the PARALLEL. */
6466 for (i
= 0; i
< n
; i
++)
6470 switch (regclass
[i
])
6472 case X86_64_NO_CLASS
:
6474 case X86_64_INTEGER_CLASS
:
6475 case X86_64_INTEGERSI_CLASS
:
6476 /* Merge TImodes on aligned occasions here too. */
6477 if (i
* 8 + 8 > bytes
)
6479 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6480 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6484 /* We've requested 24 bytes we
6485 don't have mode for. Use DImode. */
6486 if (tmpmode
== BLKmode
)
6489 = gen_rtx_EXPR_LIST (VOIDmode
,
6490 gen_rtx_REG (tmpmode
, *intreg
),
6494 case X86_64_SSESF_CLASS
:
6496 = gen_rtx_EXPR_LIST (VOIDmode
,
6497 gen_rtx_REG (SFmode
,
6498 SSE_REGNO (sse_regno
)),
6502 case X86_64_SSEDF_CLASS
:
6504 = gen_rtx_EXPR_LIST (VOIDmode
,
6505 gen_rtx_REG (DFmode
,
6506 SSE_REGNO (sse_regno
)),
6510 case X86_64_SSE_CLASS
:
6518 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6528 && regclass
[1] == X86_64_SSEUP_CLASS
6529 && regclass
[2] == X86_64_SSEUP_CLASS
6530 && regclass
[3] == X86_64_SSEUP_CLASS
);
6538 = gen_rtx_EXPR_LIST (VOIDmode
,
6539 gen_rtx_REG (tmpmode
,
6540 SSE_REGNO (sse_regno
)),
6549 /* Empty aligned struct, union or class. */
6553 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6554 for (i
= 0; i
< nexps
; i
++)
6555 XVECEXP (ret
, 0, i
) = exp
[i
];
6559 /* Update the data in CUM to advance over an argument of mode MODE
6560 and data type TYPE. (TYPE is null for libcalls where that information
6561 may not be available.) */
6564 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6565 const_tree type
, HOST_WIDE_INT bytes
,
6566 HOST_WIDE_INT words
)
6582 cum
->words
+= words
;
6583 cum
->nregs
-= words
;
6584 cum
->regno
+= words
;
6586 if (cum
->nregs
<= 0)
6594 /* OImode shouldn't be used directly. */
6598 if (cum
->float_in_sse
< 2)
6601 if (cum
->float_in_sse
< 1)
6618 if (!type
|| !AGGREGATE_TYPE_P (type
))
6620 cum
->sse_words
+= words
;
6621 cum
->sse_nregs
-= 1;
6622 cum
->sse_regno
+= 1;
6623 if (cum
->sse_nregs
<= 0)
6637 if (!type
|| !AGGREGATE_TYPE_P (type
))
6639 cum
->mmx_words
+= words
;
6640 cum
->mmx_nregs
-= 1;
6641 cum
->mmx_regno
+= 1;
6642 if (cum
->mmx_nregs
<= 0)
6653 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6654 const_tree type
, HOST_WIDE_INT words
, bool named
)
6656 int int_nregs
, sse_nregs
;
6658 /* Unnamed 256bit vector mode parameters are passed on stack. */
6659 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6662 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6663 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6665 cum
->nregs
-= int_nregs
;
6666 cum
->sse_nregs
-= sse_nregs
;
6667 cum
->regno
+= int_nregs
;
6668 cum
->sse_regno
+= sse_nregs
;
6672 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6673 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6674 cum
->words
+= words
;
6679 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6680 HOST_WIDE_INT words
)
6682 /* Otherwise, this should be passed indirect. */
6683 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6685 cum
->words
+= words
;
6693 /* Update the data in CUM to advance over an argument of mode MODE and
6694 data type TYPE. (TYPE is null for libcalls where that information
6695 may not be available.) */
6698 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6699 const_tree type
, bool named
)
6701 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6702 HOST_WIDE_INT bytes
, words
;
6704 if (mode
== BLKmode
)
6705 bytes
= int_size_in_bytes (type
);
6707 bytes
= GET_MODE_SIZE (mode
);
6708 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6711 mode
= type_natural_mode (type
, NULL
);
6713 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6714 function_arg_advance_ms_64 (cum
, bytes
, words
);
6715 else if (TARGET_64BIT
)
6716 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6718 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6721 /* Define where to put the arguments to a function.
6722 Value is zero to push the argument on the stack,
6723 or a hard register in which to store the argument.
6725 MODE is the argument's machine mode.
6726 TYPE is the data type of the argument (as a tree).
6727 This is null for libcalls where that information may
6729 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6730 the preceding args and about the function being called.
6731 NAMED is nonzero if this argument is a named parameter
6732 (otherwise it is an extra parameter matching an ellipsis). */
6735 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6736 enum machine_mode orig_mode
, const_tree type
,
6737 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6739 static bool warnedsse
, warnedmmx
;
6741 /* Avoid the AL settings for the Unix64 ABI. */
6742 if (mode
== VOIDmode
)
6758 if (words
<= cum
->nregs
)
6760 int regno
= cum
->regno
;
6762 /* Fastcall allocates the first two DWORD (SImode) or
6763 smaller arguments to ECX and EDX if it isn't an
6769 || (type
&& AGGREGATE_TYPE_P (type
)))
6772 /* ECX not EAX is the first allocated register. */
6773 if (regno
== AX_REG
)
6776 return gen_rtx_REG (mode
, regno
);
6781 if (cum
->float_in_sse
< 2)
6784 if (cum
->float_in_sse
< 1)
6788 /* In 32bit, we pass TImode in xmm registers. */
6795 if (!type
|| !AGGREGATE_TYPE_P (type
))
6797 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6800 warning (0, "SSE vector argument without SSE enabled "
6804 return gen_reg_or_parallel (mode
, orig_mode
,
6805 cum
->sse_regno
+ FIRST_SSE_REG
);
6810 /* OImode shouldn't be used directly. */
6819 if (!type
|| !AGGREGATE_TYPE_P (type
))
6822 return gen_reg_or_parallel (mode
, orig_mode
,
6823 cum
->sse_regno
+ FIRST_SSE_REG
);
6833 if (!type
|| !AGGREGATE_TYPE_P (type
))
6835 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6838 warning (0, "MMX vector argument without MMX enabled "
6842 return gen_reg_or_parallel (mode
, orig_mode
,
6843 cum
->mmx_regno
+ FIRST_MMX_REG
);
6852 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6853 enum machine_mode orig_mode
, const_tree type
, bool named
)
6855 /* Handle a hidden AL argument containing number of registers
6856 for varargs x86-64 functions. */
6857 if (mode
== VOIDmode
)
6858 return GEN_INT (cum
->maybe_vaarg
6859 ? (cum
->sse_nregs
< 0
6860 ? X86_64_SSE_REGPARM_MAX
6875 /* Unnamed 256bit vector mode parameters are passed on stack. */
6881 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6883 &x86_64_int_parameter_registers
[cum
->regno
],
6888 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6889 enum machine_mode orig_mode
, bool named
,
6890 HOST_WIDE_INT bytes
)
6894 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6895 We use value of -2 to specify that current function call is MSABI. */
6896 if (mode
== VOIDmode
)
6897 return GEN_INT (-2);
6899 /* If we've run out of registers, it goes on the stack. */
6900 if (cum
->nregs
== 0)
6903 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6905 /* Only floating point modes are passed in anything but integer regs. */
6906 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6909 regno
= cum
->regno
+ FIRST_SSE_REG
;
6914 /* Unnamed floating parameters are passed in both the
6915 SSE and integer registers. */
6916 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6917 t2
= gen_rtx_REG (mode
, regno
);
6918 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6919 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6920 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6923 /* Handle aggregated types passed in register. */
6924 if (orig_mode
== BLKmode
)
6926 if (bytes
> 0 && bytes
<= 8)
6927 mode
= (bytes
> 4 ? DImode
: SImode
);
6928 if (mode
== BLKmode
)
6932 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6935 /* Return where to put the arguments to a function.
6936 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6938 MODE is the argument's machine mode. TYPE is the data type of the
6939 argument. It is null for libcalls where that information may not be
6940 available. CUM gives information about the preceding args and about
6941 the function being called. NAMED is nonzero if this argument is a
6942 named parameter (otherwise it is an extra parameter matching an
6946 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6947 const_tree type
, bool named
)
6949 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6950 enum machine_mode mode
= omode
;
6951 HOST_WIDE_INT bytes
, words
;
6954 if (mode
== BLKmode
)
6955 bytes
= int_size_in_bytes (type
);
6957 bytes
= GET_MODE_SIZE (mode
);
6958 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6960 /* To simplify the code below, represent vector types with a vector mode
6961 even if MMX/SSE are not active. */
6962 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6963 mode
= type_natural_mode (type
, cum
);
6965 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6966 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6967 else if (TARGET_64BIT
)
6968 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6970 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6972 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
6974 /* This argument uses 256bit AVX modes. */
6976 cfun
->machine
->callee_pass_avx256_p
= true;
6978 cfun
->machine
->caller_pass_avx256_p
= true;
6984 /* A C expression that indicates when an argument must be passed by
6985 reference. If nonzero for an argument, a copy of that argument is
6986 made in memory and a pointer to the argument is passed instead of
6987 the argument itself. The pointer is passed in whatever way is
6988 appropriate for passing a pointer to that type. */
6991 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6992 enum machine_mode mode ATTRIBUTE_UNUSED
,
6993 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6995 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6997 /* See Windows x64 Software Convention. */
6998 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7000 int msize
= (int) GET_MODE_SIZE (mode
);
7003 /* Arrays are passed by reference. */
7004 if (TREE_CODE (type
) == ARRAY_TYPE
)
7007 if (AGGREGATE_TYPE_P (type
))
7009 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7010 are passed by reference. */
7011 msize
= int_size_in_bytes (type
);
7015 /* __m128 is passed by reference. */
7017 case 1: case 2: case 4: case 8:
7023 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7029 /* Return true when TYPE should be 128bit aligned for 32bit argument
7030 passing ABI. XXX: This function is obsolete and is only used for
7031 checking psABI compatibility with previous versions of GCC. */
7034 ix86_compat_aligned_value_p (const_tree type
)
7036 enum machine_mode mode
= TYPE_MODE (type
);
7037 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7041 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7043 if (TYPE_ALIGN (type
) < 128)
7046 if (AGGREGATE_TYPE_P (type
))
7048 /* Walk the aggregates recursively. */
7049 switch (TREE_CODE (type
))
7053 case QUAL_UNION_TYPE
:
7057 /* Walk all the structure fields. */
7058 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7060 if (TREE_CODE (field
) == FIELD_DECL
7061 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7068 /* Just for use if some languages passes arrays by value. */
7069 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7080 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7081 XXX: This function is obsolete and is only used for checking psABI
7082 compatibility with previous versions of GCC. */
7085 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7086 const_tree type
, unsigned int align
)
7088 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7089 natural boundaries. */
7090 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7092 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7093 make an exception for SSE modes since these require 128bit
7096 The handling here differs from field_alignment. ICC aligns MMX
7097 arguments to 4 byte boundaries, while structure fields are aligned
7098 to 8 byte boundaries. */
7101 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7102 align
= PARM_BOUNDARY
;
7106 if (!ix86_compat_aligned_value_p (type
))
7107 align
= PARM_BOUNDARY
;
7110 if (align
> BIGGEST_ALIGNMENT
)
7111 align
= BIGGEST_ALIGNMENT
;
7115 /* Return true when TYPE should be 128bit aligned for 32bit argument
7119 ix86_contains_aligned_value_p (const_tree type
)
7121 enum machine_mode mode
= TYPE_MODE (type
);
7123 if (mode
== XFmode
|| mode
== XCmode
)
7126 if (TYPE_ALIGN (type
) < 128)
7129 if (AGGREGATE_TYPE_P (type
))
7131 /* Walk the aggregates recursively. */
7132 switch (TREE_CODE (type
))
7136 case QUAL_UNION_TYPE
:
7140 /* Walk all the structure fields. */
7141 for (field
= TYPE_FIELDS (type
);
7143 field
= DECL_CHAIN (field
))
7145 if (TREE_CODE (field
) == FIELD_DECL
7146 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7153 /* Just for use if some languages passes arrays by value. */
7154 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7163 return TYPE_ALIGN (type
) >= 128;
7168 /* Gives the alignment boundary, in bits, of an argument with the
7169 specified mode and type. */
7172 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7177 /* Since the main variant type is used for call, we convert it to
7178 the main variant type. */
7179 type
= TYPE_MAIN_VARIANT (type
);
7180 align
= TYPE_ALIGN (type
);
7183 align
= GET_MODE_ALIGNMENT (mode
);
7184 if (align
< PARM_BOUNDARY
)
7185 align
= PARM_BOUNDARY
;
7189 unsigned int saved_align
= align
;
7193 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7196 if (mode
== XFmode
|| mode
== XCmode
)
7197 align
= PARM_BOUNDARY
;
7199 else if (!ix86_contains_aligned_value_p (type
))
7200 align
= PARM_BOUNDARY
;
7203 align
= PARM_BOUNDARY
;
7208 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7212 inform (input_location
,
7213 "The ABI for passing parameters with %d-byte"
7214 " alignment has changed in GCC 4.6",
7215 align
/ BITS_PER_UNIT
);
7222 /* Return true if N is a possible register number of function value. */
7225 ix86_function_value_regno_p (const unsigned int regno
)
7232 case FIRST_FLOAT_REG
:
7233 /* TODO: The function should depend on current function ABI but
7234 builtins.c would need updating then. Therefore we use the
7236 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7238 return TARGET_FLOAT_RETURNS_IN_80387
;
7244 if (TARGET_MACHO
|| TARGET_64BIT
)
7252 /* Define how to find the value returned by a function.
7253 VALTYPE is the data type of the value (as a tree).
7254 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7255 otherwise, FUNC is 0. */
7258 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7259 const_tree fntype
, const_tree fn
)
7263 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7264 we normally prevent this case when mmx is not available. However
7265 some ABIs may require the result to be returned like DImode. */
7266 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7267 regno
= FIRST_MMX_REG
;
7269 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7270 we prevent this case when sse is not available. However some ABIs
7271 may require the result to be returned like integer TImode. */
7272 else if (mode
== TImode
7273 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7274 regno
= FIRST_SSE_REG
;
7276 /* 32-byte vector modes in %ymm0. */
7277 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7278 regno
= FIRST_SSE_REG
;
7280 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7281 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7282 regno
= FIRST_FLOAT_REG
;
7284 /* Most things go in %eax. */
7287 /* Override FP return register with %xmm0 for local functions when
7288 SSE math is enabled or for functions with sseregparm attribute. */
7289 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7291 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7292 if ((sse_level
>= 1 && mode
== SFmode
)
7293 || (sse_level
== 2 && mode
== DFmode
))
7294 regno
= FIRST_SSE_REG
;
7297 /* OImode shouldn't be used directly. */
7298 gcc_assert (mode
!= OImode
);
7300 return gen_rtx_REG (orig_mode
, regno
);
7304 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7309 /* Handle libcalls, which don't provide a type node. */
7310 if (valtype
== NULL
)
7324 regno
= FIRST_SSE_REG
;
7328 regno
= FIRST_FLOAT_REG
;
7336 return gen_rtx_REG (mode
, regno
);
7338 else if (POINTER_TYPE_P (valtype
))
7340 /* Pointers are always returned in word_mode. */
7344 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7345 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7346 x86_64_int_return_registers
, 0);
7348 /* For zero sized structures, construct_container returns NULL, but we
7349 need to keep rest of compiler happy by returning meaningful value. */
7351 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7357 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7359 unsigned int regno
= AX_REG
;
7363 switch (GET_MODE_SIZE (mode
))
7366 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7367 && !COMPLEX_MODE_P (mode
))
7368 regno
= FIRST_SSE_REG
;
7372 if (mode
== SFmode
|| mode
== DFmode
)
7373 regno
= FIRST_SSE_REG
;
7379 return gen_rtx_REG (orig_mode
, regno
);
7383 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7384 enum machine_mode orig_mode
, enum machine_mode mode
)
7386 const_tree fn
, fntype
;
7389 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7390 fn
= fntype_or_decl
;
7391 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7393 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7394 return function_value_ms_64 (orig_mode
, mode
);
7395 else if (TARGET_64BIT
)
7396 return function_value_64 (orig_mode
, mode
, valtype
);
7398 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7402 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7403 bool outgoing ATTRIBUTE_UNUSED
)
7405 enum machine_mode mode
, orig_mode
;
7407 orig_mode
= TYPE_MODE (valtype
);
7408 mode
= type_natural_mode (valtype
, NULL
);
7409 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7412 /* Pointer function arguments and return values are promoted to
7415 static enum machine_mode
7416 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7417 int *punsignedp
, const_tree fntype
,
7420 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7422 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7425 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7430 ix86_libcall_value (enum machine_mode mode
)
7432 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7435 /* Return true iff type is returned in memory. */
7437 static bool ATTRIBUTE_UNUSED
7438 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7442 if (mode
== BLKmode
)
7445 size
= int_size_in_bytes (type
);
7447 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7450 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7452 /* User-created vectors small enough to fit in EAX. */
7456 /* MMX/3dNow values are returned in MM0,
7457 except when it doesn't exits or the ABI prescribes otherwise. */
7459 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7461 /* SSE values are returned in XMM0, except when it doesn't exist. */
7465 /* AVX values are returned in YMM0, except when it doesn't exist. */
7476 /* OImode shouldn't be used directly. */
7477 gcc_assert (mode
!= OImode
);
7482 static bool ATTRIBUTE_UNUSED
7483 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7485 int needed_intregs
, needed_sseregs
;
7486 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7489 static bool ATTRIBUTE_UNUSED
7490 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7492 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7494 /* __m128 is returned in xmm0. */
7495 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7496 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7499 /* Otherwise, the size must be exactly in [1248]. */
7500 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7504 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7506 #ifdef SUBTARGET_RETURN_IN_MEMORY
7507 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7509 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7513 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7514 return return_in_memory_ms_64 (type
, mode
);
7516 return return_in_memory_64 (type
, mode
);
7519 return return_in_memory_32 (type
, mode
);
7523 /* When returning SSE vector types, we have a choice of either
7524 (1) being abi incompatible with a -march switch, or
7525 (2) generating an error.
7526 Given no good solution, I think the safest thing is one warning.
7527 The user won't be able to use -Werror, but....
7529 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7530 called in response to actually generating a caller or callee that
7531 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7532 via aggregate_value_p for general type probing from tree-ssa. */
7535 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7537 static bool warnedsse
, warnedmmx
;
7539 if (!TARGET_64BIT
&& type
)
7541 /* Look at the return type of the function, not the function type. */
7542 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7544 if (!TARGET_SSE
&& !warnedsse
)
7547 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7550 warning (0, "SSE vector return without SSE enabled "
7555 if (!TARGET_MMX
&& !warnedmmx
)
7557 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7560 warning (0, "MMX vector return without MMX enabled "
7570 /* Create the va_list data type. */
7572 /* Returns the calling convention specific va_list date type.
7573 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7576 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7578 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7580 /* For i386 we use plain pointer to argument area. */
7581 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7582 return build_pointer_type (char_type_node
);
7584 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7585 type_decl
= build_decl (BUILTINS_LOCATION
,
7586 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7588 f_gpr
= build_decl (BUILTINS_LOCATION
,
7589 FIELD_DECL
, get_identifier ("gp_offset"),
7590 unsigned_type_node
);
7591 f_fpr
= build_decl (BUILTINS_LOCATION
,
7592 FIELD_DECL
, get_identifier ("fp_offset"),
7593 unsigned_type_node
);
7594 f_ovf
= build_decl (BUILTINS_LOCATION
,
7595 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7597 f_sav
= build_decl (BUILTINS_LOCATION
,
7598 FIELD_DECL
, get_identifier ("reg_save_area"),
7601 va_list_gpr_counter_field
= f_gpr
;
7602 va_list_fpr_counter_field
= f_fpr
;
7604 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7605 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7606 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7607 DECL_FIELD_CONTEXT (f_sav
) = record
;
7609 TYPE_STUB_DECL (record
) = type_decl
;
7610 TYPE_NAME (record
) = type_decl
;
7611 TYPE_FIELDS (record
) = f_gpr
;
7612 DECL_CHAIN (f_gpr
) = f_fpr
;
7613 DECL_CHAIN (f_fpr
) = f_ovf
;
7614 DECL_CHAIN (f_ovf
) = f_sav
;
7616 layout_type (record
);
7618 /* The correct type is an array type of one element. */
7619 return build_array_type (record
, build_index_type (size_zero_node
));
7622 /* Setup the builtin va_list data type and for 64-bit the additional
7623 calling convention specific va_list data types. */
7626 ix86_build_builtin_va_list (void)
7628 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7630 /* Initialize abi specific va_list builtin types. */
7634 if (ix86_abi
== MS_ABI
)
7636 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7637 if (TREE_CODE (t
) != RECORD_TYPE
)
7638 t
= build_variant_type_copy (t
);
7639 sysv_va_list_type_node
= t
;
7644 if (TREE_CODE (t
) != RECORD_TYPE
)
7645 t
= build_variant_type_copy (t
);
7646 sysv_va_list_type_node
= t
;
7648 if (ix86_abi
!= MS_ABI
)
7650 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7651 if (TREE_CODE (t
) != RECORD_TYPE
)
7652 t
= build_variant_type_copy (t
);
7653 ms_va_list_type_node
= t
;
7658 if (TREE_CODE (t
) != RECORD_TYPE
)
7659 t
= build_variant_type_copy (t
);
7660 ms_va_list_type_node
= t
;
7667 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7670 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7676 /* GPR size of varargs save area. */
7677 if (cfun
->va_list_gpr_size
)
7678 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7680 ix86_varargs_gpr_size
= 0;
7682 /* FPR size of varargs save area. We don't need it if we don't pass
7683 anything in SSE registers. */
7684 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7685 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7687 ix86_varargs_fpr_size
= 0;
7689 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7692 save_area
= frame_pointer_rtx
;
7693 set
= get_varargs_alias_set ();
7695 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7696 if (max
> X86_64_REGPARM_MAX
)
7697 max
= X86_64_REGPARM_MAX
;
7699 for (i
= cum
->regno
; i
< max
; i
++)
7701 mem
= gen_rtx_MEM (word_mode
,
7702 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7703 MEM_NOTRAP_P (mem
) = 1;
7704 set_mem_alias_set (mem
, set
);
7705 emit_move_insn (mem
,
7706 gen_rtx_REG (word_mode
,
7707 x86_64_int_parameter_registers
[i
]));
7710 if (ix86_varargs_fpr_size
)
7712 enum machine_mode smode
;
7715 /* Now emit code to save SSE registers. The AX parameter contains number
7716 of SSE parameter registers used to call this function, though all we
7717 actually check here is the zero/non-zero status. */
7719 label
= gen_label_rtx ();
7720 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7721 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7724 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7725 we used movdqa (i.e. TImode) instead? Perhaps even better would
7726 be if we could determine the real mode of the data, via a hook
7727 into pass_stdarg. Ignore all that for now. */
7729 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7730 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7732 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7733 if (max
> X86_64_SSE_REGPARM_MAX
)
7734 max
= X86_64_SSE_REGPARM_MAX
;
7736 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7738 mem
= plus_constant (Pmode
, save_area
,
7739 i
* 16 + ix86_varargs_gpr_size
);
7740 mem
= gen_rtx_MEM (smode
, mem
);
7741 MEM_NOTRAP_P (mem
) = 1;
7742 set_mem_alias_set (mem
, set
);
7743 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7745 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7753 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7755 alias_set_type set
= get_varargs_alias_set ();
7758 /* Reset to zero, as there might be a sysv vaarg used
7760 ix86_varargs_gpr_size
= 0;
7761 ix86_varargs_fpr_size
= 0;
7763 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7767 mem
= gen_rtx_MEM (Pmode
,
7768 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7769 i
* UNITS_PER_WORD
));
7770 MEM_NOTRAP_P (mem
) = 1;
7771 set_mem_alias_set (mem
, set
);
7773 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7774 emit_move_insn (mem
, reg
);
7779 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7780 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7783 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7784 CUMULATIVE_ARGS next_cum
;
7787 /* This argument doesn't appear to be used anymore. Which is good,
7788 because the old code here didn't suppress rtl generation. */
7789 gcc_assert (!no_rtl
);
7794 fntype
= TREE_TYPE (current_function_decl
);
7796 /* For varargs, we do not want to skip the dummy va_dcl argument.
7797 For stdargs, we do want to skip the last named argument. */
7799 if (stdarg_p (fntype
))
7800 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7803 if (cum
->call_abi
== MS_ABI
)
7804 setup_incoming_varargs_ms_64 (&next_cum
);
7806 setup_incoming_varargs_64 (&next_cum
);
7809 /* Checks if TYPE is of kind va_list char *. */
7812 is_va_list_char_pointer (tree type
)
7816 /* For 32-bit it is always true. */
7819 canonic
= ix86_canonical_va_list_type (type
);
7820 return (canonic
== ms_va_list_type_node
7821 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7824 /* Implement va_start. */
7827 ix86_va_start (tree valist
, rtx nextarg
)
7829 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7830 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7831 tree gpr
, fpr
, ovf
, sav
, t
;
7835 if (flag_split_stack
7836 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7838 unsigned int scratch_regno
;
7840 /* When we are splitting the stack, we can't refer to the stack
7841 arguments using internal_arg_pointer, because they may be on
7842 the old stack. The split stack prologue will arrange to
7843 leave a pointer to the old stack arguments in a scratch
7844 register, which we here copy to a pseudo-register. The split
7845 stack prologue can't set the pseudo-register directly because
7846 it (the prologue) runs before any registers have been saved. */
7848 scratch_regno
= split_stack_prologue_scratch_regno ();
7849 if (scratch_regno
!= INVALID_REGNUM
)
7853 reg
= gen_reg_rtx (Pmode
);
7854 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7857 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7861 push_topmost_sequence ();
7862 emit_insn_after (seq
, entry_of_function ());
7863 pop_topmost_sequence ();
7867 /* Only 64bit target needs something special. */
7868 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7870 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7871 std_expand_builtin_va_start (valist
, nextarg
);
7876 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7877 next
= expand_binop (ptr_mode
, add_optab
,
7878 cfun
->machine
->split_stack_varargs_pointer
,
7879 crtl
->args
.arg_offset_rtx
,
7880 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7881 convert_move (va_r
, next
, 0);
7886 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7887 f_fpr
= DECL_CHAIN (f_gpr
);
7888 f_ovf
= DECL_CHAIN (f_fpr
);
7889 f_sav
= DECL_CHAIN (f_ovf
);
7891 valist
= build_simple_mem_ref (valist
);
7892 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7893 /* The following should be folded into the MEM_REF offset. */
7894 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7896 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7898 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7900 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7903 /* Count number of gp and fp argument registers used. */
7904 words
= crtl
->args
.info
.words
;
7905 n_gpr
= crtl
->args
.info
.regno
;
7906 n_fpr
= crtl
->args
.info
.sse_regno
;
7908 if (cfun
->va_list_gpr_size
)
7910 type
= TREE_TYPE (gpr
);
7911 t
= build2 (MODIFY_EXPR
, type
,
7912 gpr
, build_int_cst (type
, n_gpr
* 8));
7913 TREE_SIDE_EFFECTS (t
) = 1;
7914 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7917 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7919 type
= TREE_TYPE (fpr
);
7920 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7921 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7922 TREE_SIDE_EFFECTS (t
) = 1;
7923 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7926 /* Find the overflow area. */
7927 type
= TREE_TYPE (ovf
);
7928 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7929 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7931 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7932 t
= make_tree (type
, ovf_rtx
);
7934 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7935 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7936 TREE_SIDE_EFFECTS (t
) = 1;
7937 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7939 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7941 /* Find the register save area.
7942 Prologue of the function save it right above stack frame. */
7943 type
= TREE_TYPE (sav
);
7944 t
= make_tree (type
, frame_pointer_rtx
);
7945 if (!ix86_varargs_gpr_size
)
7946 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7947 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7948 TREE_SIDE_EFFECTS (t
) = 1;
7949 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7953 /* Implement va_arg. */
7956 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7959 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7960 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7961 tree gpr
, fpr
, ovf
, sav
, t
;
7963 tree lab_false
, lab_over
= NULL_TREE
;
7968 enum machine_mode nat_mode
;
7969 unsigned int arg_boundary
;
7971 /* Only 64bit target needs something special. */
7972 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7973 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7975 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7976 f_fpr
= DECL_CHAIN (f_gpr
);
7977 f_ovf
= DECL_CHAIN (f_fpr
);
7978 f_sav
= DECL_CHAIN (f_ovf
);
7980 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7981 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7982 valist
= build_va_arg_indirect_ref (valist
);
7983 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7984 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7985 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7987 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7989 type
= build_pointer_type (type
);
7990 size
= int_size_in_bytes (type
);
7991 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7993 nat_mode
= type_natural_mode (type
, NULL
);
8002 /* Unnamed 256bit vector mode parameters are passed on stack. */
8003 if (!TARGET_64BIT_MS_ABI
)
8010 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8011 type
, 0, X86_64_REGPARM_MAX
,
8012 X86_64_SSE_REGPARM_MAX
, intreg
,
8017 /* Pull the value out of the saved registers. */
8019 addr
= create_tmp_var (ptr_type_node
, "addr");
8023 int needed_intregs
, needed_sseregs
;
8025 tree int_addr
, sse_addr
;
8027 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8028 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8030 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8032 need_temp
= (!REG_P (container
)
8033 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8034 || TYPE_ALIGN (type
) > 128));
8036 /* In case we are passing structure, verify that it is consecutive block
8037 on the register save area. If not we need to do moves. */
8038 if (!need_temp
&& !REG_P (container
))
8040 /* Verify that all registers are strictly consecutive */
8041 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8045 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8047 rtx slot
= XVECEXP (container
, 0, i
);
8048 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8049 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8057 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8059 rtx slot
= XVECEXP (container
, 0, i
);
8060 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8061 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8073 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8074 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8077 /* First ensure that we fit completely in registers. */
8080 t
= build_int_cst (TREE_TYPE (gpr
),
8081 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8082 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8083 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8084 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8085 gimplify_and_add (t
, pre_p
);
8089 t
= build_int_cst (TREE_TYPE (fpr
),
8090 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8091 + X86_64_REGPARM_MAX
* 8);
8092 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8093 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8094 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8095 gimplify_and_add (t
, pre_p
);
8098 /* Compute index to start of area used for integer regs. */
8101 /* int_addr = gpr + sav; */
8102 t
= fold_build_pointer_plus (sav
, gpr
);
8103 gimplify_assign (int_addr
, t
, pre_p
);
8107 /* sse_addr = fpr + sav; */
8108 t
= fold_build_pointer_plus (sav
, fpr
);
8109 gimplify_assign (sse_addr
, t
, pre_p
);
8113 int i
, prev_size
= 0;
8114 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8117 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8118 gimplify_assign (addr
, t
, pre_p
);
8120 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8122 rtx slot
= XVECEXP (container
, 0, i
);
8123 rtx reg
= XEXP (slot
, 0);
8124 enum machine_mode mode
= GET_MODE (reg
);
8130 tree dest_addr
, dest
;
8131 int cur_size
= GET_MODE_SIZE (mode
);
8133 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8134 prev_size
= INTVAL (XEXP (slot
, 1));
8135 if (prev_size
+ cur_size
> size
)
8137 cur_size
= size
- prev_size
;
8138 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8139 if (mode
== BLKmode
)
8142 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8143 if (mode
== GET_MODE (reg
))
8144 addr_type
= build_pointer_type (piece_type
);
8146 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8148 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8151 if (SSE_REGNO_P (REGNO (reg
)))
8153 src_addr
= sse_addr
;
8154 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8158 src_addr
= int_addr
;
8159 src_offset
= REGNO (reg
) * 8;
8161 src_addr
= fold_convert (addr_type
, src_addr
);
8162 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8164 dest_addr
= fold_convert (daddr_type
, addr
);
8165 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8166 if (cur_size
== GET_MODE_SIZE (mode
))
8168 src
= build_va_arg_indirect_ref (src_addr
);
8169 dest
= build_va_arg_indirect_ref (dest_addr
);
8171 gimplify_assign (dest
, src
, pre_p
);
8176 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8177 3, dest_addr
, src_addr
,
8178 size_int (cur_size
));
8179 gimplify_and_add (copy
, pre_p
);
8181 prev_size
+= cur_size
;
8187 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8188 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8189 gimplify_assign (gpr
, t
, pre_p
);
8194 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8195 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8196 gimplify_assign (fpr
, t
, pre_p
);
8199 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8201 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8204 /* ... otherwise out of the overflow area. */
8206 /* When we align parameter on stack for caller, if the parameter
8207 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8208 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8209 here with caller. */
8210 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8211 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8212 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8214 /* Care for on-stack alignment if needed. */
8215 if (arg_boundary
<= 64 || size
== 0)
8219 HOST_WIDE_INT align
= arg_boundary
/ 8;
8220 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8221 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8222 build_int_cst (TREE_TYPE (t
), -align
));
8225 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8226 gimplify_assign (addr
, t
, pre_p
);
8228 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8229 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8232 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8234 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8235 addr
= fold_convert (ptrtype
, addr
);
8238 addr
= build_va_arg_indirect_ref (addr
);
8239 return build_va_arg_indirect_ref (addr
);
8242 /* Return true if OPNUM's MEM should be matched
8243 in movabs* patterns. */
8246 ix86_check_movabs (rtx insn
, int opnum
)
8250 set
= PATTERN (insn
);
8251 if (GET_CODE (set
) == PARALLEL
)
8252 set
= XVECEXP (set
, 0, 0);
8253 gcc_assert (GET_CODE (set
) == SET
);
8254 mem
= XEXP (set
, opnum
);
8255 while (GET_CODE (mem
) == SUBREG
)
8256 mem
= SUBREG_REG (mem
);
8257 gcc_assert (MEM_P (mem
));
8258 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8261 /* Initialize the table of extra 80387 mathematical constants. */
8264 init_ext_80387_constants (void)
8266 static const char * cst
[5] =
8268 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8269 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8270 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8271 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8272 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8276 for (i
= 0; i
< 5; i
++)
8278 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8279 /* Ensure each constant is rounded to XFmode precision. */
8280 real_convert (&ext_80387_constants_table
[i
],
8281 XFmode
, &ext_80387_constants_table
[i
]);
8284 ext_80387_constants_init
= 1;
8287 /* Return non-zero if the constant is something that
8288 can be loaded with a special instruction. */
8291 standard_80387_constant_p (rtx x
)
8293 enum machine_mode mode
= GET_MODE (x
);
8297 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8300 if (x
== CONST0_RTX (mode
))
8302 if (x
== CONST1_RTX (mode
))
8305 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8307 /* For XFmode constants, try to find a special 80387 instruction when
8308 optimizing for size or on those CPUs that benefit from them. */
8310 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8314 if (! ext_80387_constants_init
)
8315 init_ext_80387_constants ();
8317 for (i
= 0; i
< 5; i
++)
8318 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8322 /* Load of the constant -0.0 or -1.0 will be split as
8323 fldz;fchs or fld1;fchs sequence. */
8324 if (real_isnegzero (&r
))
8326 if (real_identical (&r
, &dconstm1
))
8332 /* Return the opcode of the special instruction to be used to load
8336 standard_80387_constant_opcode (rtx x
)
8338 switch (standard_80387_constant_p (x
))
8362 /* Return the CONST_DOUBLE representing the 80387 constant that is
8363 loaded by the specified special instruction. The argument IDX
8364 matches the return value from standard_80387_constant_p. */
8367 standard_80387_constant_rtx (int idx
)
8371 if (! ext_80387_constants_init
)
8372 init_ext_80387_constants ();
8388 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8392 /* Return 1 if X is all 0s and 2 if x is all 1s
8393 in supported SSE/AVX vector mode. */
8396 standard_sse_constant_p (rtx x
)
8398 enum machine_mode mode
= GET_MODE (x
);
8400 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8402 if (vector_all_ones_operand (x
, mode
))
8424 /* Return the opcode of the special instruction to be used to load
8428 standard_sse_constant_opcode (rtx insn
, rtx x
)
8430 switch (standard_sse_constant_p (x
))
8433 switch (get_attr_mode (insn
))
8436 return "%vpxor\t%0, %d0";
8438 return "%vxorpd\t%0, %d0";
8440 return "%vxorps\t%0, %d0";
8443 return "vpxor\t%x0, %x0, %x0";
8445 return "vxorpd\t%x0, %x0, %x0";
8447 return "vxorps\t%x0, %x0, %x0";
8455 return "vpcmpeqd\t%0, %0, %0";
8457 return "pcmpeqd\t%0, %0";
8465 /* Returns true if OP contains a symbol reference */
8468 symbolic_reference_mentioned_p (rtx op
)
8473 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8476 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8477 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8483 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8484 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8488 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8495 /* Return true if it is appropriate to emit `ret' instructions in the
8496 body of a function. Do this only if the epilogue is simple, needing a
8497 couple of insns. Prior to reloading, we can't tell how many registers
8498 must be saved, so return false then. Return false if there is no frame
8499 marker to de-allocate. */
8502 ix86_can_use_return_insn_p (void)
8504 struct ix86_frame frame
;
8506 if (! reload_completed
|| frame_pointer_needed
)
8509 /* Don't allow more than 32k pop, since that's all we can do
8510 with one instruction. */
8511 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8514 ix86_compute_frame_layout (&frame
);
8515 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8516 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8519 /* Value should be nonzero if functions must have frame pointers.
8520 Zero means the frame pointer need not be set up (and parms may
8521 be accessed via the stack pointer) in functions that seem suitable. */
8524 ix86_frame_pointer_required (void)
8526 /* If we accessed previous frames, then the generated code expects
8527 to be able to access the saved ebp value in our frame. */
8528 if (cfun
->machine
->accesses_prev_frame
)
8531 /* Several x86 os'es need a frame pointer for other reasons,
8532 usually pertaining to setjmp. */
8533 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8536 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8537 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8540 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8541 turns off the frame pointer by default. Turn it back on now if
8542 we've not got a leaf function. */
8543 if (TARGET_OMIT_LEAF_FRAME_POINTER
8544 && (!current_function_is_leaf
8545 || ix86_current_function_calls_tls_descriptor
))
8548 if (crtl
->profile
&& !flag_fentry
)
8554 /* Record that the current function accesses previous call frames. */
8557 ix86_setup_frame_addresses (void)
8559 cfun
->machine
->accesses_prev_frame
= 1;
8562 #ifndef USE_HIDDEN_LINKONCE
8563 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8564 # define USE_HIDDEN_LINKONCE 1
8566 # define USE_HIDDEN_LINKONCE 0
8570 static int pic_labels_used
;
8572 /* Fills in the label name that should be used for a pc thunk for
8573 the given register. */
8576 get_pc_thunk_name (char name
[32], unsigned int regno
)
8578 gcc_assert (!TARGET_64BIT
);
8580 if (USE_HIDDEN_LINKONCE
)
8581 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8583 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8587 /* This function generates code for -fpic that loads %ebx with
8588 the return address of the caller and then returns. */
8591 ix86_code_end (void)
8596 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8601 if (!(pic_labels_used
& (1 << regno
)))
8604 get_pc_thunk_name (name
, regno
);
8606 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8607 get_identifier (name
),
8608 build_function_type_list (void_type_node
, NULL_TREE
));
8609 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8610 NULL_TREE
, void_type_node
);
8611 TREE_PUBLIC (decl
) = 1;
8612 TREE_STATIC (decl
) = 1;
8613 DECL_IGNORED_P (decl
) = 1;
8618 switch_to_section (darwin_sections
[text_coal_section
]);
8619 fputs ("\t.weak_definition\t", asm_out_file
);
8620 assemble_name (asm_out_file
, name
);
8621 fputs ("\n\t.private_extern\t", asm_out_file
);
8622 assemble_name (asm_out_file
, name
);
8623 putc ('\n', asm_out_file
);
8624 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8625 DECL_WEAK (decl
) = 1;
8629 if (USE_HIDDEN_LINKONCE
)
8631 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8633 targetm
.asm_out
.unique_section (decl
, 0);
8634 switch_to_section (get_named_section (decl
, NULL
, 0));
8636 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8637 fputs ("\t.hidden\t", asm_out_file
);
8638 assemble_name (asm_out_file
, name
);
8639 putc ('\n', asm_out_file
);
8640 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8644 switch_to_section (text_section
);
8645 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8648 DECL_INITIAL (decl
) = make_node (BLOCK
);
8649 current_function_decl
= decl
;
8650 init_function_start (decl
);
8651 first_function_block_is_cold
= false;
8652 /* Make sure unwind info is emitted for the thunk if needed. */
8653 final_start_function (emit_barrier (), asm_out_file
, 1);
8655 /* Pad stack IP move with 4 instructions (two NOPs count
8656 as one instruction). */
8657 if (TARGET_PAD_SHORT_FUNCTION
)
8662 fputs ("\tnop\n", asm_out_file
);
8665 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8666 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8667 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8668 fputs ("\tret\n", asm_out_file
);
8669 final_end_function ();
8670 init_insn_lengths ();
8671 free_after_compilation (cfun
);
8673 current_function_decl
= NULL
;
8676 if (flag_split_stack
)
8677 file_end_indicate_split_stack ();
8680 /* Emit code for the SET_GOT patterns. */
8683 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8689 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8691 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8692 xops
[2] = gen_rtx_MEM (Pmode
,
8693 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8694 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8696 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8697 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8698 an unadorned address. */
8699 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8700 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8701 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8705 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8709 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8711 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8714 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8715 is what will be referenced by the Mach-O PIC subsystem. */
8717 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8720 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8721 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8726 get_pc_thunk_name (name
, REGNO (dest
));
8727 pic_labels_used
|= 1 << REGNO (dest
);
8729 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8730 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8731 output_asm_insn ("call\t%X2", xops
);
8732 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8733 is what will be referenced by the Mach-O PIC subsystem. */
8736 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8738 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8739 CODE_LABEL_NUMBER (label
));
8744 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8749 /* Generate an "push" pattern for input ARG. */
8754 struct machine_function
*m
= cfun
->machine
;
8756 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8757 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8758 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8760 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8761 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8763 return gen_rtx_SET (VOIDmode
,
8764 gen_rtx_MEM (word_mode
,
8765 gen_rtx_PRE_DEC (Pmode
,
8766 stack_pointer_rtx
)),
8770 /* Generate an "pop" pattern for input ARG. */
8775 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8776 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8778 return gen_rtx_SET (VOIDmode
,
8780 gen_rtx_MEM (word_mode
,
8781 gen_rtx_POST_INC (Pmode
,
8782 stack_pointer_rtx
)));
8785 /* Return >= 0 if there is an unused call-clobbered register available
8786 for the entire function. */
8789 ix86_select_alt_pic_regnum (void)
8791 if (current_function_is_leaf
8793 && !ix86_current_function_calls_tls_descriptor
)
8796 /* Can't use the same register for both PIC and DRAP. */
8798 drap
= REGNO (crtl
->drap_reg
);
8801 for (i
= 2; i
>= 0; --i
)
8802 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8806 return INVALID_REGNUM
;
8809 /* Return TRUE if we need to save REGNO. */
8812 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8814 if (pic_offset_table_rtx
8815 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8816 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8818 || crtl
->calls_eh_return
8819 || crtl
->uses_const_pool
))
8820 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8822 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8827 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8828 if (test
== INVALID_REGNUM
)
8835 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8838 return (df_regs_ever_live_p (regno
)
8839 && !call_used_regs
[regno
]
8840 && !fixed_regs
[regno
]
8841 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8844 /* Return number of saved general prupose registers. */
8847 ix86_nsaved_regs (void)
8852 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8853 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8858 /* Return number of saved SSE registrers. */
8861 ix86_nsaved_sseregs (void)
8866 if (!TARGET_64BIT_MS_ABI
)
8868 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8869 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8874 /* Given FROM and TO register numbers, say whether this elimination is
8875 allowed. If stack alignment is needed, we can only replace argument
8876 pointer with hard frame pointer, or replace frame pointer with stack
8877 pointer. Otherwise, frame pointer elimination is automatically
8878 handled and all other eliminations are valid. */
8881 ix86_can_eliminate (const int from
, const int to
)
8883 if (stack_realign_fp
)
8884 return ((from
== ARG_POINTER_REGNUM
8885 && to
== HARD_FRAME_POINTER_REGNUM
)
8886 || (from
== FRAME_POINTER_REGNUM
8887 && to
== STACK_POINTER_REGNUM
));
8889 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8892 /* Return the offset between two registers, one to be eliminated, and the other
8893 its replacement, at the start of a routine. */
8896 ix86_initial_elimination_offset (int from
, int to
)
8898 struct ix86_frame frame
;
8899 ix86_compute_frame_layout (&frame
);
8901 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8902 return frame
.hard_frame_pointer_offset
;
8903 else if (from
== FRAME_POINTER_REGNUM
8904 && to
== HARD_FRAME_POINTER_REGNUM
)
8905 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8908 gcc_assert (to
== STACK_POINTER_REGNUM
);
8910 if (from
== ARG_POINTER_REGNUM
)
8911 return frame
.stack_pointer_offset
;
8913 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8914 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8918 /* In a dynamically-aligned function, we can't know the offset from
8919 stack pointer to frame pointer, so we must ensure that setjmp
8920 eliminates fp against the hard fp (%ebp) rather than trying to
8921 index from %esp up to the top of the frame across a gap that is
8922 of unknown (at compile-time) size. */
8924 ix86_builtin_setjmp_frame_value (void)
8926 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8929 /* When using -fsplit-stack, the allocation routines set a field in
8930 the TCB to the bottom of the stack plus this much space, measured
8933 #define SPLIT_STACK_AVAILABLE 256
8935 /* Fill structure ix86_frame about frame of currently computed function. */
8938 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8940 unsigned int stack_alignment_needed
;
8941 HOST_WIDE_INT offset
;
8942 unsigned int preferred_alignment
;
8943 HOST_WIDE_INT size
= get_frame_size ();
8944 HOST_WIDE_INT to_allocate
;
8946 frame
->nregs
= ix86_nsaved_regs ();
8947 frame
->nsseregs
= ix86_nsaved_sseregs ();
8949 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8950 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8952 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8953 function prologues and leaf. */
8954 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8955 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
8956 || ix86_current_function_calls_tls_descriptor
))
8958 preferred_alignment
= 16;
8959 stack_alignment_needed
= 16;
8960 crtl
->preferred_stack_boundary
= 128;
8961 crtl
->stack_alignment_needed
= 128;
8964 gcc_assert (!size
|| stack_alignment_needed
);
8965 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8966 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8968 /* For SEH we have to limit the amount of code movement into the prologue.
8969 At present we do this via a BLOCKAGE, at which point there's very little
8970 scheduling that can be done, which means that there's very little point
8971 in doing anything except PUSHs. */
8973 cfun
->machine
->use_fast_prologue_epilogue
= false;
8975 /* During reload iteration the amount of registers saved can change.
8976 Recompute the value as needed. Do not recompute when amount of registers
8977 didn't change as reload does multiple calls to the function and does not
8978 expect the decision to change within single iteration. */
8979 else if (!optimize_function_for_size_p (cfun
)
8980 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8982 int count
= frame
->nregs
;
8983 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8985 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8987 /* The fast prologue uses move instead of push to save registers. This
8988 is significantly longer, but also executes faster as modern hardware
8989 can execute the moves in parallel, but can't do that for push/pop.
8991 Be careful about choosing what prologue to emit: When function takes
8992 many instructions to execute we may use slow version as well as in
8993 case function is known to be outside hot spot (this is known with
8994 feedback only). Weight the size of function by number of registers
8995 to save as it is cheap to use one or two push instructions but very
8996 slow to use many of them. */
8998 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8999 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9000 || (flag_branch_probabilities
9001 && node
->frequency
< NODE_FREQUENCY_HOT
))
9002 cfun
->machine
->use_fast_prologue_epilogue
= false;
9004 cfun
->machine
->use_fast_prologue_epilogue
9005 = !expensive_function_p (count
);
9008 frame
->save_regs_using_mov
9009 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9010 /* If static stack checking is enabled and done with probes,
9011 the registers need to be saved before allocating the frame. */
9012 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9014 /* Skip return address. */
9015 offset
= UNITS_PER_WORD
;
9017 /* Skip pushed static chain. */
9018 if (ix86_static_chain_on_stack
)
9019 offset
+= UNITS_PER_WORD
;
9021 /* Skip saved base pointer. */
9022 if (frame_pointer_needed
)
9023 offset
+= UNITS_PER_WORD
;
9024 frame
->hfp_save_offset
= offset
;
9026 /* The traditional frame pointer location is at the top of the frame. */
9027 frame
->hard_frame_pointer_offset
= offset
;
9029 /* Register save area */
9030 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9031 frame
->reg_save_offset
= offset
;
9033 /* Align and set SSE register save area. */
9034 if (frame
->nsseregs
)
9036 /* The only ABI that has saved SSE registers (Win64) also has a
9037 16-byte aligned default stack, and thus we don't need to be
9038 within the re-aligned local stack frame to save them. */
9039 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9040 offset
= (offset
+ 16 - 1) & -16;
9041 offset
+= frame
->nsseregs
* 16;
9043 frame
->sse_reg_save_offset
= offset
;
9045 /* The re-aligned stack starts here. Values before this point are not
9046 directly comparable with values below this point. In order to make
9047 sure that no value happens to be the same before and after, force
9048 the alignment computation below to add a non-zero value. */
9049 if (stack_realign_fp
)
9050 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9053 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9054 offset
+= frame
->va_arg_size
;
9056 /* Align start of frame for local function. */
9057 if (stack_realign_fp
9058 || offset
!= frame
->sse_reg_save_offset
9060 || !current_function_is_leaf
9061 || cfun
->calls_alloca
9062 || ix86_current_function_calls_tls_descriptor
)
9063 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9065 /* Frame pointer points here. */
9066 frame
->frame_pointer_offset
= offset
;
9070 /* Add outgoing arguments area. Can be skipped if we eliminated
9071 all the function calls as dead code.
9072 Skipping is however impossible when function calls alloca. Alloca
9073 expander assumes that last crtl->outgoing_args_size
9074 of stack frame are unused. */
9075 if (ACCUMULATE_OUTGOING_ARGS
9076 && (!current_function_is_leaf
|| cfun
->calls_alloca
9077 || ix86_current_function_calls_tls_descriptor
))
9079 offset
+= crtl
->outgoing_args_size
;
9080 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9083 frame
->outgoing_arguments_size
= 0;
9085 /* Align stack boundary. Only needed if we're calling another function
9087 if (!current_function_is_leaf
|| cfun
->calls_alloca
9088 || ix86_current_function_calls_tls_descriptor
)
9089 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9091 /* We've reached end of stack frame. */
9092 frame
->stack_pointer_offset
= offset
;
9094 /* Size prologue needs to allocate. */
9095 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9097 if ((!to_allocate
&& frame
->nregs
<= 1)
9098 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9099 frame
->save_regs_using_mov
= false;
9101 if (ix86_using_red_zone ()
9102 && current_function_sp_is_unchanging
9103 && current_function_is_leaf
9104 && !ix86_current_function_calls_tls_descriptor
)
9106 frame
->red_zone_size
= to_allocate
;
9107 if (frame
->save_regs_using_mov
)
9108 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9109 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9110 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9113 frame
->red_zone_size
= 0;
9114 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9116 /* The SEH frame pointer location is near the bottom of the frame.
9117 This is enforced by the fact that the difference between the
9118 stack pointer and the frame pointer is limited to 240 bytes in
9119 the unwind data structure. */
9124 /* If we can leave the frame pointer where it is, do so. */
9125 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9126 if (diff
> 240 || (diff
& 15) != 0)
9128 /* Ideally we'd determine what portion of the local stack frame
9129 (within the constraint of the lowest 240) is most heavily used.
9130 But without that complication, simply bias the frame pointer
9131 by 128 bytes so as to maximize the amount of the local stack
9132 frame that is addressable with 8-bit offsets. */
9133 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9138 /* This is semi-inlined memory_address_length, but simplified
9139 since we know that we're always dealing with reg+offset, and
9140 to avoid having to create and discard all that rtl. */
9143 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9149 /* EBP and R13 cannot be encoded without an offset. */
9150 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9152 else if (IN_RANGE (offset
, -128, 127))
9155 /* ESP and R12 must be encoded with a SIB byte. */
9156 if (regno
== SP_REG
|| regno
== R12_REG
)
9162 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9163 The valid base registers are taken from CFUN->MACHINE->FS. */
9166 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9168 const struct machine_function
*m
= cfun
->machine
;
9169 rtx base_reg
= NULL
;
9170 HOST_WIDE_INT base_offset
= 0;
9172 if (m
->use_fast_prologue_epilogue
)
9174 /* Choose the base register most likely to allow the most scheduling
9175 opportunities. Generally FP is valid througout the function,
9176 while DRAP must be reloaded within the epilogue. But choose either
9177 over the SP due to increased encoding size. */
9181 base_reg
= hard_frame_pointer_rtx
;
9182 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9184 else if (m
->fs
.drap_valid
)
9186 base_reg
= crtl
->drap_reg
;
9187 base_offset
= 0 - cfa_offset
;
9189 else if (m
->fs
.sp_valid
)
9191 base_reg
= stack_pointer_rtx
;
9192 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9197 HOST_WIDE_INT toffset
;
9200 /* Choose the base register with the smallest address encoding.
9201 With a tie, choose FP > DRAP > SP. */
9204 base_reg
= stack_pointer_rtx
;
9205 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9206 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9208 if (m
->fs
.drap_valid
)
9210 toffset
= 0 - cfa_offset
;
9211 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9214 base_reg
= crtl
->drap_reg
;
9215 base_offset
= toffset
;
9221 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9222 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9225 base_reg
= hard_frame_pointer_rtx
;
9226 base_offset
= toffset
;
9231 gcc_assert (base_reg
!= NULL
);
9233 return plus_constant (Pmode
, base_reg
, base_offset
);
9236 /* Emit code to save registers in the prologue. */
9239 ix86_emit_save_regs (void)
9244 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9245 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9247 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9248 RTX_FRAME_RELATED_P (insn
) = 1;
9252 /* Emit a single register save at CFA - CFA_OFFSET. */
9255 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9256 HOST_WIDE_INT cfa_offset
)
9258 struct machine_function
*m
= cfun
->machine
;
9259 rtx reg
= gen_rtx_REG (mode
, regno
);
9260 rtx mem
, addr
, base
, insn
;
9262 addr
= choose_baseaddr (cfa_offset
);
9263 mem
= gen_frame_mem (mode
, addr
);
9265 /* For SSE saves, we need to indicate the 128-bit alignment. */
9266 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9268 insn
= emit_move_insn (mem
, reg
);
9269 RTX_FRAME_RELATED_P (insn
) = 1;
9272 if (GET_CODE (base
) == PLUS
)
9273 base
= XEXP (base
, 0);
9274 gcc_checking_assert (REG_P (base
));
9276 /* When saving registers into a re-aligned local stack frame, avoid
9277 any tricky guessing by dwarf2out. */
9278 if (m
->fs
.realigned
)
9280 gcc_checking_assert (stack_realign_drap
);
9282 if (regno
== REGNO (crtl
->drap_reg
))
9284 /* A bit of a hack. We force the DRAP register to be saved in
9285 the re-aligned stack frame, which provides us with a copy
9286 of the CFA that will last past the prologue. Install it. */
9287 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9288 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9289 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9290 mem
= gen_rtx_MEM (mode
, addr
);
9291 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9295 /* The frame pointer is a stable reference within the
9296 aligned frame. Use it. */
9297 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9298 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9299 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9300 mem
= gen_rtx_MEM (mode
, addr
);
9301 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9302 gen_rtx_SET (VOIDmode
, mem
, reg
));
9306 /* The memory may not be relative to the current CFA register,
9307 which means that we may need to generate a new pattern for
9308 use by the unwind info. */
9309 else if (base
!= m
->fs
.cfa_reg
)
9311 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9312 m
->fs
.cfa_offset
- cfa_offset
);
9313 mem
= gen_rtx_MEM (mode
, addr
);
9314 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9318 /* Emit code to save registers using MOV insns.
9319 First register is stored at CFA - CFA_OFFSET. */
9321 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9325 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9326 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9328 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9329 cfa_offset
-= UNITS_PER_WORD
;
9333 /* Emit code to save SSE registers using MOV insns.
9334 First register is stored at CFA - CFA_OFFSET. */
9336 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9340 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9341 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9343 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9348 static GTY(()) rtx queued_cfa_restores
;
9350 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9351 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9352 Don't add the note if the previously saved value will be left untouched
9353 within stack red-zone till return, as unwinders can find the same value
9354 in the register and on the stack. */
9357 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9359 if (!crtl
->shrink_wrapped
9360 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9365 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9366 RTX_FRAME_RELATED_P (insn
) = 1;
9370 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9373 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9376 ix86_add_queued_cfa_restore_notes (rtx insn
)
9379 if (!queued_cfa_restores
)
9381 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9383 XEXP (last
, 1) = REG_NOTES (insn
);
9384 REG_NOTES (insn
) = queued_cfa_restores
;
9385 queued_cfa_restores
= NULL_RTX
;
9386 RTX_FRAME_RELATED_P (insn
) = 1;
9389 /* Expand prologue or epilogue stack adjustment.
9390 The pattern exist to put a dependency on all ebp-based memory accesses.
9391 STYLE should be negative if instructions should be marked as frame related,
9392 zero if %r11 register is live and cannot be freely used and positive
9396 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9397 int style
, bool set_cfa
)
9399 struct machine_function
*m
= cfun
->machine
;
9401 bool add_frame_related_expr
= false;
9403 if (Pmode
== SImode
)
9404 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9405 else if (x86_64_immediate_operand (offset
, DImode
))
9406 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9410 /* r11 is used by indirect sibcall return as well, set before the
9411 epilogue and used after the epilogue. */
9413 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9416 gcc_assert (src
!= hard_frame_pointer_rtx
9417 && dest
!= hard_frame_pointer_rtx
);
9418 tmp
= hard_frame_pointer_rtx
;
9420 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9422 add_frame_related_expr
= true;
9424 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9427 insn
= emit_insn (insn
);
9429 ix86_add_queued_cfa_restore_notes (insn
);
9435 gcc_assert (m
->fs
.cfa_reg
== src
);
9436 m
->fs
.cfa_offset
+= INTVAL (offset
);
9437 m
->fs
.cfa_reg
= dest
;
9439 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9440 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9441 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9442 RTX_FRAME_RELATED_P (insn
) = 1;
9446 RTX_FRAME_RELATED_P (insn
) = 1;
9447 if (add_frame_related_expr
)
9449 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9450 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9451 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9455 if (dest
== stack_pointer_rtx
)
9457 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9458 bool valid
= m
->fs
.sp_valid
;
9460 if (src
== hard_frame_pointer_rtx
)
9462 valid
= m
->fs
.fp_valid
;
9463 ooffset
= m
->fs
.fp_offset
;
9465 else if (src
== crtl
->drap_reg
)
9467 valid
= m
->fs
.drap_valid
;
9472 /* Else there are two possibilities: SP itself, which we set
9473 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9474 taken care of this by hand along the eh_return path. */
9475 gcc_checking_assert (src
== stack_pointer_rtx
9476 || offset
== const0_rtx
);
9479 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9480 m
->fs
.sp_valid
= valid
;
9484 /* Find an available register to be used as dynamic realign argument
9485 pointer regsiter. Such a register will be written in prologue and
9486 used in begin of body, so it must not be
9487 1. parameter passing register.
9489 We reuse static-chain register if it is available. Otherwise, we
9490 use DI for i386 and R13 for x86-64. We chose R13 since it has
9493 Return: the regno of chosen register. */
9496 find_drap_reg (void)
9498 tree decl
= cfun
->decl
;
9502 /* Use R13 for nested function or function need static chain.
9503 Since function with tail call may use any caller-saved
9504 registers in epilogue, DRAP must not use caller-saved
9505 register in such case. */
9506 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9513 /* Use DI for nested function or function need static chain.
9514 Since function with tail call may use any caller-saved
9515 registers in epilogue, DRAP must not use caller-saved
9516 register in such case. */
9517 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9520 /* Reuse static chain register if it isn't used for parameter
9522 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9524 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9525 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9532 /* Return minimum incoming stack alignment. */
9535 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9537 unsigned int incoming_stack_boundary
;
9539 /* Prefer the one specified at command line. */
9540 if (ix86_user_incoming_stack_boundary
)
9541 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9542 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9543 if -mstackrealign is used, it isn't used for sibcall check and
9544 estimated stack alignment is 128bit. */
9547 && ix86_force_align_arg_pointer
9548 && crtl
->stack_alignment_estimated
== 128)
9549 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9551 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9553 /* Incoming stack alignment can be changed on individual functions
9554 via force_align_arg_pointer attribute. We use the smallest
9555 incoming stack boundary. */
9556 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9557 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9558 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9559 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9561 /* The incoming stack frame has to be aligned at least at
9562 parm_stack_boundary. */
9563 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9564 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9566 /* Stack at entrance of main is aligned by runtime. We use the
9567 smallest incoming stack boundary. */
9568 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9569 && DECL_NAME (current_function_decl
)
9570 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9571 && DECL_FILE_SCOPE_P (current_function_decl
))
9572 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9574 return incoming_stack_boundary
;
9577 /* Update incoming stack boundary and estimated stack alignment. */
9580 ix86_update_stack_boundary (void)
9582 ix86_incoming_stack_boundary
9583 = ix86_minimum_incoming_stack_boundary (false);
9585 /* x86_64 vararg needs 16byte stack alignment for register save
9589 && crtl
->stack_alignment_estimated
< 128)
9590 crtl
->stack_alignment_estimated
= 128;
9593 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9594 needed or an rtx for DRAP otherwise. */
9597 ix86_get_drap_rtx (void)
9599 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9600 crtl
->need_drap
= true;
9602 if (stack_realign_drap
)
9604 /* Assign DRAP to vDRAP and returns vDRAP */
9605 unsigned int regno
= find_drap_reg ();
9610 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9611 crtl
->drap_reg
= arg_ptr
;
9614 drap_vreg
= copy_to_reg (arg_ptr
);
9618 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9621 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9622 RTX_FRAME_RELATED_P (insn
) = 1;
9630 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9633 ix86_internal_arg_pointer (void)
9635 return virtual_incoming_args_rtx
;
9638 struct scratch_reg
{
9643 /* Return a short-lived scratch register for use on function entry.
9644 In 32-bit mode, it is valid only after the registers are saved
9645 in the prologue. This register must be released by means of
9646 release_scratch_register_on_entry once it is dead. */
9649 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9657 /* We always use R11 in 64-bit mode. */
9662 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9664 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9665 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9666 int regparm
= ix86_function_regparm (fntype
, decl
);
9668 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9670 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9671 for the static chain register. */
9672 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9673 && drap_regno
!= AX_REG
)
9675 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9677 /* ecx is the static chain register. */
9678 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9679 && drap_regno
!= CX_REG
)
9681 else if (ix86_save_reg (BX_REG
, true))
9683 /* esi is the static chain register. */
9684 else if (!(regparm
== 3 && static_chain_p
)
9685 && ix86_save_reg (SI_REG
, true))
9687 else if (ix86_save_reg (DI_REG
, true))
9691 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9696 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9699 rtx insn
= emit_insn (gen_push (sr
->reg
));
9700 RTX_FRAME_RELATED_P (insn
) = 1;
9704 /* Release a scratch register obtained from the preceding function. */
9707 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9711 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9713 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9714 RTX_FRAME_RELATED_P (insn
) = 1;
9715 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9716 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9717 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9721 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9723 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9726 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9728 /* We skip the probe for the first interval + a small dope of 4 words and
9729 probe that many bytes past the specified size to maintain a protection
9730 area at the botton of the stack. */
9731 const int dope
= 4 * UNITS_PER_WORD
;
9732 rtx size_rtx
= GEN_INT (size
), last
;
9734 /* See if we have a constant small number of probes to generate. If so,
9735 that's the easy case. The run-time loop is made up of 11 insns in the
9736 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9737 for n # of intervals. */
9738 if (size
<= 5 * PROBE_INTERVAL
)
9740 HOST_WIDE_INT i
, adjust
;
9741 bool first_probe
= true;
9743 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9744 values of N from 1 until it exceeds SIZE. If only one probe is
9745 needed, this will not generate any code. Then adjust and probe
9746 to PROBE_INTERVAL + SIZE. */
9747 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9751 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9752 first_probe
= false;
9755 adjust
= PROBE_INTERVAL
;
9757 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9758 plus_constant (Pmode
, stack_pointer_rtx
,
9760 emit_stack_probe (stack_pointer_rtx
);
9764 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9766 adjust
= size
+ PROBE_INTERVAL
- i
;
9768 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9769 plus_constant (Pmode
, stack_pointer_rtx
,
9771 emit_stack_probe (stack_pointer_rtx
);
9773 /* Adjust back to account for the additional first interval. */
9774 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9775 plus_constant (Pmode
, stack_pointer_rtx
,
9776 PROBE_INTERVAL
+ dope
)));
9779 /* Otherwise, do the same as above, but in a loop. Note that we must be
9780 extra careful with variables wrapping around because we might be at
9781 the very top (or the very bottom) of the address space and we have
9782 to be able to handle this case properly; in particular, we use an
9783 equality test for the loop condition. */
9786 HOST_WIDE_INT rounded_size
;
9787 struct scratch_reg sr
;
9789 get_scratch_register_on_entry (&sr
);
9792 /* Step 1: round SIZE to the previous multiple of the interval. */
9794 rounded_size
= size
& -PROBE_INTERVAL
;
9797 /* Step 2: compute initial and final value of the loop counter. */
9799 /* SP = SP_0 + PROBE_INTERVAL. */
9800 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9801 plus_constant (Pmode
, stack_pointer_rtx
,
9802 - (PROBE_INTERVAL
+ dope
))));
9804 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9805 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9806 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9807 gen_rtx_PLUS (Pmode
, sr
.reg
,
9808 stack_pointer_rtx
)));
9813 while (SP != LAST_ADDR)
9815 SP = SP + PROBE_INTERVAL
9819 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9820 values of N from 1 until it is equal to ROUNDED_SIZE. */
9822 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9825 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9826 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9828 if (size
!= rounded_size
)
9830 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9831 plus_constant (Pmode
, stack_pointer_rtx
,
9832 rounded_size
- size
)));
9833 emit_stack_probe (stack_pointer_rtx
);
9836 /* Adjust back to account for the additional first interval. */
9837 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9838 plus_constant (Pmode
, stack_pointer_rtx
,
9839 PROBE_INTERVAL
+ dope
)));
9841 release_scratch_register_on_entry (&sr
);
9844 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9846 /* Even if the stack pointer isn't the CFA register, we need to correctly
9847 describe the adjustments made to it, in particular differentiate the
9848 frame-related ones from the frame-unrelated ones. */
9851 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9852 XVECEXP (expr
, 0, 0)
9853 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9854 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9855 XVECEXP (expr
, 0, 1)
9856 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9857 plus_constant (Pmode
, stack_pointer_rtx
,
9858 PROBE_INTERVAL
+ dope
+ size
));
9859 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9860 RTX_FRAME_RELATED_P (last
) = 1;
9862 cfun
->machine
->fs
.sp_offset
+= size
;
9865 /* Make sure nothing is scheduled before we are done. */
9866 emit_insn (gen_blockage ());
9869 /* Adjust the stack pointer up to REG while probing it. */
9872 output_adjust_stack_and_probe (rtx reg
)
9874 static int labelno
= 0;
9875 char loop_lab
[32], end_lab
[32];
9878 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9879 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9881 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9883 /* Jump to END_LAB if SP == LAST_ADDR. */
9884 xops
[0] = stack_pointer_rtx
;
9886 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9887 fputs ("\tje\t", asm_out_file
);
9888 assemble_name_raw (asm_out_file
, end_lab
);
9889 fputc ('\n', asm_out_file
);
9891 /* SP = SP + PROBE_INTERVAL. */
9892 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9893 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9896 xops
[1] = const0_rtx
;
9897 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9899 fprintf (asm_out_file
, "\tjmp\t");
9900 assemble_name_raw (asm_out_file
, loop_lab
);
9901 fputc ('\n', asm_out_file
);
9903 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9908 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9909 inclusive. These are offsets from the current stack pointer. */
9912 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9914 /* See if we have a constant small number of probes to generate. If so,
9915 that's the easy case. The run-time loop is made up of 7 insns in the
9916 generic case while the compile-time loop is made up of n insns for n #
9918 if (size
<= 7 * PROBE_INTERVAL
)
9922 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9923 it exceeds SIZE. If only one probe is needed, this will not
9924 generate any code. Then probe at FIRST + SIZE. */
9925 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9926 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9929 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9933 /* Otherwise, do the same as above, but in a loop. Note that we must be
9934 extra careful with variables wrapping around because we might be at
9935 the very top (or the very bottom) of the address space and we have
9936 to be able to handle this case properly; in particular, we use an
9937 equality test for the loop condition. */
9940 HOST_WIDE_INT rounded_size
, last
;
9941 struct scratch_reg sr
;
9943 get_scratch_register_on_entry (&sr
);
9946 /* Step 1: round SIZE to the previous multiple of the interval. */
9948 rounded_size
= size
& -PROBE_INTERVAL
;
9951 /* Step 2: compute initial and final value of the loop counter. */
9953 /* TEST_OFFSET = FIRST. */
9954 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9956 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9957 last
= first
+ rounded_size
;
9962 while (TEST_ADDR != LAST_ADDR)
9964 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9968 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9969 until it is equal to ROUNDED_SIZE. */
9971 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9974 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9975 that SIZE is equal to ROUNDED_SIZE. */
9977 if (size
!= rounded_size
)
9978 emit_stack_probe (plus_constant (Pmode
,
9979 gen_rtx_PLUS (Pmode
,
9982 rounded_size
- size
));
9984 release_scratch_register_on_entry (&sr
);
9987 /* Make sure nothing is scheduled before we are done. */
9988 emit_insn (gen_blockage ());
9991 /* Probe a range of stack addresses from REG to END, inclusive. These are
9992 offsets from the current stack pointer. */
9995 output_probe_stack_range (rtx reg
, rtx end
)
9997 static int labelno
= 0;
9998 char loop_lab
[32], end_lab
[32];
10001 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10002 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10004 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10006 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10009 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10010 fputs ("\tje\t", asm_out_file
);
10011 assemble_name_raw (asm_out_file
, end_lab
);
10012 fputc ('\n', asm_out_file
);
10014 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10015 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10016 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10018 /* Probe at TEST_ADDR. */
10019 xops
[0] = stack_pointer_rtx
;
10021 xops
[2] = const0_rtx
;
10022 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10024 fprintf (asm_out_file
, "\tjmp\t");
10025 assemble_name_raw (asm_out_file
, loop_lab
);
10026 fputc ('\n', asm_out_file
);
10028 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10033 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10034 to be generated in correct form. */
10036 ix86_finalize_stack_realign_flags (void)
10038 /* Check if stack realign is really needed after reload, and
10039 stores result in cfun */
10040 unsigned int incoming_stack_boundary
10041 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10042 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10043 unsigned int stack_realign
= (incoming_stack_boundary
10044 < (current_function_is_leaf
10045 ? crtl
->max_used_stack_slot_alignment
10046 : crtl
->stack_alignment_needed
));
10048 if (crtl
->stack_realign_finalized
)
10050 /* After stack_realign_needed is finalized, we can't no longer
10052 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10056 /* If the only reason for frame_pointer_needed is that we conservatively
10057 assumed stack realignment might be needed, but in the end nothing that
10058 needed the stack alignment had been spilled, clear frame_pointer_needed
10059 and say we don't need stack realignment. */
10061 && !crtl
->need_drap
10062 && frame_pointer_needed
10063 && current_function_is_leaf
10064 && flag_omit_frame_pointer
10065 && current_function_sp_is_unchanging
10066 && !ix86_current_function_calls_tls_descriptor
10067 && !crtl
->accesses_prior_frames
10068 && !cfun
->calls_alloca
10069 && !crtl
->calls_eh_return
10070 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10071 && !ix86_frame_pointer_required ()
10072 && get_frame_size () == 0
10073 && ix86_nsaved_sseregs () == 0
10074 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10076 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10079 CLEAR_HARD_REG_SET (prologue_used
);
10080 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10081 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10082 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10083 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10084 HARD_FRAME_POINTER_REGNUM
);
10088 FOR_BB_INSNS (bb
, insn
)
10089 if (NONDEBUG_INSN_P (insn
)
10090 && requires_stack_frame_p (insn
, prologue_used
,
10091 set_up_by_prologue
))
10093 crtl
->stack_realign_needed
= stack_realign
;
10094 crtl
->stack_realign_finalized
= true;
10099 frame_pointer_needed
= false;
10100 stack_realign
= false;
10101 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10102 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10103 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10104 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10105 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10106 df_finish_pass (true);
10107 df_scan_alloc (NULL
);
10109 df_compute_regs_ever_live (true);
10113 crtl
->stack_realign_needed
= stack_realign
;
10114 crtl
->stack_realign_finalized
= true;
10117 /* Expand the prologue into a bunch of separate insns. */
10120 ix86_expand_prologue (void)
10122 struct machine_function
*m
= cfun
->machine
;
10125 struct ix86_frame frame
;
10126 HOST_WIDE_INT allocate
;
10127 bool int_registers_saved
;
10129 ix86_finalize_stack_realign_flags ();
10131 /* DRAP should not coexist with stack_realign_fp */
10132 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10134 memset (&m
->fs
, 0, sizeof (m
->fs
));
10136 /* Initialize CFA state for before the prologue. */
10137 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10138 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10140 /* Track SP offset to the CFA. We continue tracking this after we've
10141 swapped the CFA register away from SP. In the case of re-alignment
10142 this is fudged; we're interested to offsets within the local frame. */
10143 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10144 m
->fs
.sp_valid
= true;
10146 ix86_compute_frame_layout (&frame
);
10148 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10150 /* We should have already generated an error for any use of
10151 ms_hook on a nested function. */
10152 gcc_checking_assert (!ix86_static_chain_on_stack
);
10154 /* Check if profiling is active and we shall use profiling before
10155 prologue variant. If so sorry. */
10156 if (crtl
->profile
&& flag_fentry
!= 0)
10157 sorry ("ms_hook_prologue attribute isn%'t compatible "
10158 "with -mfentry for 32-bit");
10160 /* In ix86_asm_output_function_label we emitted:
10161 8b ff movl.s %edi,%edi
10163 8b ec movl.s %esp,%ebp
10165 This matches the hookable function prologue in Win32 API
10166 functions in Microsoft Windows XP Service Pack 2 and newer.
10167 Wine uses this to enable Windows apps to hook the Win32 API
10168 functions provided by Wine.
10170 What that means is that we've already set up the frame pointer. */
10172 if (frame_pointer_needed
10173 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10177 /* We've decided to use the frame pointer already set up.
10178 Describe this to the unwinder by pretending that both
10179 push and mov insns happen right here.
10181 Putting the unwind info here at the end of the ms_hook
10182 is done so that we can make absolutely certain we get
10183 the required byte sequence at the start of the function,
10184 rather than relying on an assembler that can produce
10185 the exact encoding required.
10187 However it does mean (in the unpatched case) that we have
10188 a 1 insn window where the asynchronous unwind info is
10189 incorrect. However, if we placed the unwind info at
10190 its correct location we would have incorrect unwind info
10191 in the patched case. Which is probably all moot since
10192 I don't expect Wine generates dwarf2 unwind info for the
10193 system libraries that use this feature. */
10195 insn
= emit_insn (gen_blockage ());
10197 push
= gen_push (hard_frame_pointer_rtx
);
10198 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10199 stack_pointer_rtx
);
10200 RTX_FRAME_RELATED_P (push
) = 1;
10201 RTX_FRAME_RELATED_P (mov
) = 1;
10203 RTX_FRAME_RELATED_P (insn
) = 1;
10204 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10205 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10207 /* Note that gen_push incremented m->fs.cfa_offset, even
10208 though we didn't emit the push insn here. */
10209 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10210 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10211 m
->fs
.fp_valid
= true;
10215 /* The frame pointer is not needed so pop %ebp again.
10216 This leaves us with a pristine state. */
10217 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10221 /* The first insn of a function that accepts its static chain on the
10222 stack is to push the register that would be filled in by a direct
10223 call. This insn will be skipped by the trampoline. */
10224 else if (ix86_static_chain_on_stack
)
10226 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10227 emit_insn (gen_blockage ());
10229 /* We don't want to interpret this push insn as a register save,
10230 only as a stack adjustment. The real copy of the register as
10231 a save will be done later, if needed. */
10232 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10233 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10234 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10235 RTX_FRAME_RELATED_P (insn
) = 1;
10238 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10239 of DRAP is needed and stack realignment is really needed after reload */
10240 if (stack_realign_drap
)
10242 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10244 /* Only need to push parameter pointer reg if it is caller saved. */
10245 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10247 /* Push arg pointer reg */
10248 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10249 RTX_FRAME_RELATED_P (insn
) = 1;
10252 /* Grab the argument pointer. */
10253 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10254 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10255 RTX_FRAME_RELATED_P (insn
) = 1;
10256 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10257 m
->fs
.cfa_offset
= 0;
10259 /* Align the stack. */
10260 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10262 GEN_INT (-align_bytes
)));
10263 RTX_FRAME_RELATED_P (insn
) = 1;
10265 /* Replicate the return address on the stack so that return
10266 address can be reached via (argp - 1) slot. This is needed
10267 to implement macro RETURN_ADDR_RTX and intrinsic function
10268 expand_builtin_return_addr etc. */
10269 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10270 t
= gen_frame_mem (word_mode
, t
);
10271 insn
= emit_insn (gen_push (t
));
10272 RTX_FRAME_RELATED_P (insn
) = 1;
10274 /* For the purposes of frame and register save area addressing,
10275 we've started over with a new frame. */
10276 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10277 m
->fs
.realigned
= true;
10280 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10282 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10283 slower on all targets. Also sdb doesn't like it. */
10284 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10285 RTX_FRAME_RELATED_P (insn
) = 1;
10287 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10289 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10290 RTX_FRAME_RELATED_P (insn
) = 1;
10292 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10293 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10294 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10295 m
->fs
.fp_valid
= true;
10299 int_registers_saved
= (frame
.nregs
== 0);
10301 if (!int_registers_saved
)
10303 /* If saving registers via PUSH, do so now. */
10304 if (!frame
.save_regs_using_mov
)
10306 ix86_emit_save_regs ();
10307 int_registers_saved
= true;
10308 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10311 /* When using red zone we may start register saving before allocating
10312 the stack frame saving one cycle of the prologue. However, avoid
10313 doing this if we have to probe the stack; at least on x86_64 the
10314 stack probe can turn into a call that clobbers a red zone location. */
10315 else if (ix86_using_red_zone ()
10316 && (! TARGET_STACK_PROBE
10317 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10319 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10320 int_registers_saved
= true;
10324 if (stack_realign_fp
)
10326 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10327 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10329 /* The computation of the size of the re-aligned stack frame means
10330 that we must allocate the size of the register save area before
10331 performing the actual alignment. Otherwise we cannot guarantee
10332 that there's enough storage above the realignment point. */
10333 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10334 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10335 GEN_INT (m
->fs
.sp_offset
10336 - frame
.sse_reg_save_offset
),
10339 /* Align the stack. */
10340 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10342 GEN_INT (-align_bytes
)));
10344 /* For the purposes of register save area addressing, the stack
10345 pointer is no longer valid. As for the value of sp_offset,
10346 see ix86_compute_frame_layout, which we need to match in order
10347 to pass verification of stack_pointer_offset at the end. */
10348 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10349 m
->fs
.sp_valid
= false;
10352 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10354 if (flag_stack_usage_info
)
10356 /* We start to count from ARG_POINTER. */
10357 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10359 /* If it was realigned, take into account the fake frame. */
10360 if (stack_realign_drap
)
10362 if (ix86_static_chain_on_stack
)
10363 stack_size
+= UNITS_PER_WORD
;
10365 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10366 stack_size
+= UNITS_PER_WORD
;
10368 /* This over-estimates by 1 minimal-stack-alignment-unit but
10369 mitigates that by counting in the new return address slot. */
10370 current_function_dynamic_stack_size
10371 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10374 current_function_static_stack_size
= stack_size
;
10377 /* The stack has already been decremented by the instruction calling us
10378 so probe if the size is non-negative to preserve the protection area. */
10379 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10381 /* We expect the registers to be saved when probes are used. */
10382 gcc_assert (int_registers_saved
);
10384 if (STACK_CHECK_MOVING_SP
)
10386 ix86_adjust_stack_and_probe (allocate
);
10391 HOST_WIDE_INT size
= allocate
;
10393 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10394 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10396 if (TARGET_STACK_PROBE
)
10397 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10399 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10405 else if (!ix86_target_stack_probe ()
10406 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10408 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10409 GEN_INT (-allocate
), -1,
10410 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10414 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10416 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10418 bool eax_live
= false;
10419 bool r10_live
= false;
10422 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10423 if (!TARGET_64BIT_MS_ABI
)
10424 eax_live
= ix86_eax_live_at_start_p ();
10428 emit_insn (gen_push (eax
));
10429 allocate
-= UNITS_PER_WORD
;
10433 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10434 emit_insn (gen_push (r10
));
10435 allocate
-= UNITS_PER_WORD
;
10438 emit_move_insn (eax
, GEN_INT (allocate
));
10439 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10441 /* Use the fact that AX still contains ALLOCATE. */
10442 adjust_stack_insn
= (Pmode
== DImode
10443 ? gen_pro_epilogue_adjust_stack_di_sub
10444 : gen_pro_epilogue_adjust_stack_si_sub
);
10446 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10447 stack_pointer_rtx
, eax
));
10449 /* Note that SEH directives need to continue tracking the stack
10450 pointer even after the frame pointer has been set up. */
10451 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10453 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10454 m
->fs
.cfa_offset
+= allocate
;
10456 RTX_FRAME_RELATED_P (insn
) = 1;
10457 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10458 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10459 plus_constant (Pmode
, stack_pointer_rtx
,
10462 m
->fs
.sp_offset
+= allocate
;
10464 if (r10_live
&& eax_live
)
10466 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10467 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10468 gen_frame_mem (word_mode
, t
));
10469 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10470 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10471 gen_frame_mem (word_mode
, t
));
10473 else if (eax_live
|| r10_live
)
10475 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10476 emit_move_insn (gen_rtx_REG (word_mode
,
10477 (eax_live
? AX_REG
: R10_REG
)),
10478 gen_frame_mem (word_mode
, t
));
10481 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10483 /* If we havn't already set up the frame pointer, do so now. */
10484 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10486 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10487 GEN_INT (frame
.stack_pointer_offset
10488 - frame
.hard_frame_pointer_offset
));
10489 insn
= emit_insn (insn
);
10490 RTX_FRAME_RELATED_P (insn
) = 1;
10491 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10493 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10494 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10495 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10496 m
->fs
.fp_valid
= true;
10499 if (!int_registers_saved
)
10500 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10501 if (frame
.nsseregs
)
10502 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10504 pic_reg_used
= false;
10505 if (pic_offset_table_rtx
10506 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10509 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10511 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10512 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10514 pic_reg_used
= true;
10521 if (ix86_cmodel
== CM_LARGE_PIC
)
10523 rtx label
, tmp_reg
;
10525 gcc_assert (Pmode
== DImode
);
10526 label
= gen_label_rtx ();
10527 emit_label (label
);
10528 LABEL_PRESERVE_P (label
) = 1;
10529 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10530 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10531 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10533 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10534 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10535 pic_offset_table_rtx
, tmp_reg
));
10538 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10542 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10543 RTX_FRAME_RELATED_P (insn
) = 1;
10544 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10548 /* In the pic_reg_used case, make sure that the got load isn't deleted
10549 when mcount needs it. Blockage to avoid call movement across mcount
10550 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10552 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10553 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10555 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10557 /* vDRAP is setup but after reload it turns out stack realign
10558 isn't necessary, here we will emit prologue to setup DRAP
10559 without stack realign adjustment */
10560 t
= choose_baseaddr (0);
10561 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10564 /* Prevent instructions from being scheduled into register save push
10565 sequence when access to the redzone area is done through frame pointer.
10566 The offset between the frame pointer and the stack pointer is calculated
10567 relative to the value of the stack pointer at the end of the function
10568 prologue, and moving instructions that access redzone area via frame
10569 pointer inside push sequence violates this assumption. */
10570 if (frame_pointer_needed
&& frame
.red_zone_size
)
10571 emit_insn (gen_memory_blockage ());
10573 /* Emit cld instruction if stringops are used in the function. */
10574 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10575 emit_insn (gen_cld ());
10577 /* SEH requires that the prologue end within 256 bytes of the start of
10578 the function. Prevent instruction schedules that would extend that.
10579 Further, prevent alloca modifications to the stack pointer from being
10580 combined with prologue modifications. */
10582 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10585 /* Emit code to restore REG using a POP insn. */
10588 ix86_emit_restore_reg_using_pop (rtx reg
)
10590 struct machine_function
*m
= cfun
->machine
;
10591 rtx insn
= emit_insn (gen_pop (reg
));
10593 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10594 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10596 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10597 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10599 /* Previously we'd represented the CFA as an expression
10600 like *(%ebp - 8). We've just popped that value from
10601 the stack, which means we need to reset the CFA to
10602 the drap register. This will remain until we restore
10603 the stack pointer. */
10604 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10605 RTX_FRAME_RELATED_P (insn
) = 1;
10607 /* This means that the DRAP register is valid for addressing too. */
10608 m
->fs
.drap_valid
= true;
10612 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10614 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10615 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10616 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10617 RTX_FRAME_RELATED_P (insn
) = 1;
10619 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10622 /* When the frame pointer is the CFA, and we pop it, we are
10623 swapping back to the stack pointer as the CFA. This happens
10624 for stack frames that don't allocate other data, so we assume
10625 the stack pointer is now pointing at the return address, i.e.
10626 the function entry state, which makes the offset be 1 word. */
10627 if (reg
== hard_frame_pointer_rtx
)
10629 m
->fs
.fp_valid
= false;
10630 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10632 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10633 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10635 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10636 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10637 GEN_INT (m
->fs
.cfa_offset
)));
10638 RTX_FRAME_RELATED_P (insn
) = 1;
10643 /* Emit code to restore saved registers using POP insns. */
10646 ix86_emit_restore_regs_using_pop (void)
10648 unsigned int regno
;
10650 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10651 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10652 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10655 /* Emit code and notes for the LEAVE instruction. */
10658 ix86_emit_leave (void)
10660 struct machine_function
*m
= cfun
->machine
;
10661 rtx insn
= emit_insn (ix86_gen_leave ());
10663 ix86_add_queued_cfa_restore_notes (insn
);
10665 gcc_assert (m
->fs
.fp_valid
);
10666 m
->fs
.sp_valid
= true;
10667 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10668 m
->fs
.fp_valid
= false;
10670 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10672 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10673 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10675 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10676 plus_constant (Pmode
, stack_pointer_rtx
,
10678 RTX_FRAME_RELATED_P (insn
) = 1;
10680 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10684 /* Emit code to restore saved registers using MOV insns.
10685 First register is restored from CFA - CFA_OFFSET. */
10687 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10688 bool maybe_eh_return
)
10690 struct machine_function
*m
= cfun
->machine
;
10691 unsigned int regno
;
10693 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10694 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10696 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10699 mem
= choose_baseaddr (cfa_offset
);
10700 mem
= gen_frame_mem (word_mode
, mem
);
10701 insn
= emit_move_insn (reg
, mem
);
10703 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10705 /* Previously we'd represented the CFA as an expression
10706 like *(%ebp - 8). We've just popped that value from
10707 the stack, which means we need to reset the CFA to
10708 the drap register. This will remain until we restore
10709 the stack pointer. */
10710 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10711 RTX_FRAME_RELATED_P (insn
) = 1;
10713 /* This means that the DRAP register is valid for addressing. */
10714 m
->fs
.drap_valid
= true;
10717 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10719 cfa_offset
-= UNITS_PER_WORD
;
10723 /* Emit code to restore saved registers using MOV insns.
10724 First register is restored from CFA - CFA_OFFSET. */
10726 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10727 bool maybe_eh_return
)
10729 unsigned int regno
;
10731 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10732 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10734 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10737 mem
= choose_baseaddr (cfa_offset
);
10738 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10739 set_mem_align (mem
, 128);
10740 emit_move_insn (reg
, mem
);
10742 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10748 /* Emit vzeroupper if needed. */
10751 ix86_maybe_emit_epilogue_vzeroupper (void)
10753 if (TARGET_VZEROUPPER
10754 && !TREE_THIS_VOLATILE (cfun
->decl
)
10755 && !cfun
->machine
->caller_return_avx256_p
)
10756 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10759 /* Restore function stack, frame, and registers. */
10762 ix86_expand_epilogue (int style
)
10764 struct machine_function
*m
= cfun
->machine
;
10765 struct machine_frame_state frame_state_save
= m
->fs
;
10766 struct ix86_frame frame
;
10767 bool restore_regs_via_mov
;
10770 ix86_finalize_stack_realign_flags ();
10771 ix86_compute_frame_layout (&frame
);
10773 m
->fs
.sp_valid
= (!frame_pointer_needed
10774 || (current_function_sp_is_unchanging
10775 && !stack_realign_fp
));
10776 gcc_assert (!m
->fs
.sp_valid
10777 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10779 /* The FP must be valid if the frame pointer is present. */
10780 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10781 gcc_assert (!m
->fs
.fp_valid
10782 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10784 /* We must have *some* valid pointer to the stack frame. */
10785 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10787 /* The DRAP is never valid at this point. */
10788 gcc_assert (!m
->fs
.drap_valid
);
10790 /* See the comment about red zone and frame
10791 pointer usage in ix86_expand_prologue. */
10792 if (frame_pointer_needed
&& frame
.red_zone_size
)
10793 emit_insn (gen_memory_blockage ());
10795 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10796 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10798 /* Determine the CFA offset of the end of the red-zone. */
10799 m
->fs
.red_zone_offset
= 0;
10800 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10802 /* The red-zone begins below the return address. */
10803 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10805 /* When the register save area is in the aligned portion of
10806 the stack, determine the maximum runtime displacement that
10807 matches up with the aligned frame. */
10808 if (stack_realign_drap
)
10809 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10813 /* Special care must be taken for the normal return case of a function
10814 using eh_return: the eax and edx registers are marked as saved, but
10815 not restored along this path. Adjust the save location to match. */
10816 if (crtl
->calls_eh_return
&& style
!= 2)
10817 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10819 /* EH_RETURN requires the use of moves to function properly. */
10820 if (crtl
->calls_eh_return
)
10821 restore_regs_via_mov
= true;
10822 /* SEH requires the use of pops to identify the epilogue. */
10823 else if (TARGET_SEH
)
10824 restore_regs_via_mov
= false;
10825 /* If we're only restoring one register and sp is not valid then
10826 using a move instruction to restore the register since it's
10827 less work than reloading sp and popping the register. */
10828 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10829 restore_regs_via_mov
= true;
10830 else if (TARGET_EPILOGUE_USING_MOVE
10831 && cfun
->machine
->use_fast_prologue_epilogue
10832 && (frame
.nregs
> 1
10833 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10834 restore_regs_via_mov
= true;
10835 else if (frame_pointer_needed
10837 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10838 restore_regs_via_mov
= true;
10839 else if (frame_pointer_needed
10840 && TARGET_USE_LEAVE
10841 && cfun
->machine
->use_fast_prologue_epilogue
10842 && frame
.nregs
== 1)
10843 restore_regs_via_mov
= true;
10845 restore_regs_via_mov
= false;
10847 if (restore_regs_via_mov
|| frame
.nsseregs
)
10849 /* Ensure that the entire register save area is addressable via
10850 the stack pointer, if we will restore via sp. */
10852 && m
->fs
.sp_offset
> 0x7fffffff
10853 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10854 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10856 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10857 GEN_INT (m
->fs
.sp_offset
10858 - frame
.sse_reg_save_offset
),
10860 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10864 /* If there are any SSE registers to restore, then we have to do it
10865 via moves, since there's obviously no pop for SSE regs. */
10866 if (frame
.nsseregs
)
10867 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10870 if (restore_regs_via_mov
)
10875 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10877 /* eh_return epilogues need %ecx added to the stack pointer. */
10880 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10882 /* Stack align doesn't work with eh_return. */
10883 gcc_assert (!stack_realign_drap
);
10884 /* Neither does regparm nested functions. */
10885 gcc_assert (!ix86_static_chain_on_stack
);
10887 if (frame_pointer_needed
)
10889 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10890 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10891 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10893 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10894 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10896 /* Note that we use SA as a temporary CFA, as the return
10897 address is at the proper place relative to it. We
10898 pretend this happens at the FP restore insn because
10899 prior to this insn the FP would be stored at the wrong
10900 offset relative to SA, and after this insn we have no
10901 other reasonable register to use for the CFA. We don't
10902 bother resetting the CFA to the SP for the duration of
10903 the return insn. */
10904 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10905 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
10906 ix86_add_queued_cfa_restore_notes (insn
);
10907 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10908 RTX_FRAME_RELATED_P (insn
) = 1;
10910 m
->fs
.cfa_reg
= sa
;
10911 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10912 m
->fs
.fp_valid
= false;
10914 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10915 const0_rtx
, style
, false);
10919 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10920 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10921 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10922 ix86_add_queued_cfa_restore_notes (insn
);
10924 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10925 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10927 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10928 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10929 plus_constant (Pmode
, stack_pointer_rtx
,
10931 RTX_FRAME_RELATED_P (insn
) = 1;
10934 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10935 m
->fs
.sp_valid
= true;
10940 /* SEH requires that the function end with (1) a stack adjustment
10941 if necessary, (2) a sequence of pops, and (3) a return or
10942 jump instruction. Prevent insns from the function body from
10943 being scheduled into this sequence. */
10946 /* Prevent a catch region from being adjacent to the standard
10947 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10948 several other flags that would be interesting to test are
10950 if (flag_non_call_exceptions
)
10951 emit_insn (gen_nops (const1_rtx
));
10953 emit_insn (gen_blockage ());
10956 /* First step is to deallocate the stack frame so that we can
10957 pop the registers. */
10958 if (!m
->fs
.sp_valid
)
10960 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10961 GEN_INT (m
->fs
.fp_offset
10962 - frame
.reg_save_offset
),
10965 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10967 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10968 GEN_INT (m
->fs
.sp_offset
10969 - frame
.reg_save_offset
),
10971 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10974 ix86_emit_restore_regs_using_pop ();
10977 /* If we used a stack pointer and haven't already got rid of it,
10979 if (m
->fs
.fp_valid
)
10981 /* If the stack pointer is valid and pointing at the frame
10982 pointer store address, then we only need a pop. */
10983 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10984 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10985 /* Leave results in shorter dependency chains on CPUs that are
10986 able to grok it fast. */
10987 else if (TARGET_USE_LEAVE
10988 || optimize_function_for_size_p (cfun
)
10989 || !cfun
->machine
->use_fast_prologue_epilogue
)
10990 ix86_emit_leave ();
10993 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10994 hard_frame_pointer_rtx
,
10995 const0_rtx
, style
, !using_drap
);
10996 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11002 int param_ptr_offset
= UNITS_PER_WORD
;
11005 gcc_assert (stack_realign_drap
);
11007 if (ix86_static_chain_on_stack
)
11008 param_ptr_offset
+= UNITS_PER_WORD
;
11009 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11010 param_ptr_offset
+= UNITS_PER_WORD
;
11012 insn
= emit_insn (gen_rtx_SET
11013 (VOIDmode
, stack_pointer_rtx
,
11014 gen_rtx_PLUS (Pmode
,
11016 GEN_INT (-param_ptr_offset
))));
11017 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11018 m
->fs
.cfa_offset
= param_ptr_offset
;
11019 m
->fs
.sp_offset
= param_ptr_offset
;
11020 m
->fs
.realigned
= false;
11022 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11023 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11024 GEN_INT (param_ptr_offset
)));
11025 RTX_FRAME_RELATED_P (insn
) = 1;
11027 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11028 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11031 /* At this point the stack pointer must be valid, and we must have
11032 restored all of the registers. We may not have deallocated the
11033 entire stack frame. We've delayed this until now because it may
11034 be possible to merge the local stack deallocation with the
11035 deallocation forced by ix86_static_chain_on_stack. */
11036 gcc_assert (m
->fs
.sp_valid
);
11037 gcc_assert (!m
->fs
.fp_valid
);
11038 gcc_assert (!m
->fs
.realigned
);
11039 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11041 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11042 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11046 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11048 /* Sibcall epilogues don't want a return instruction. */
11051 m
->fs
= frame_state_save
;
11055 /* Emit vzeroupper if needed. */
11056 ix86_maybe_emit_epilogue_vzeroupper ();
11058 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11060 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11062 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11063 address, do explicit add, and jump indirectly to the caller. */
11065 if (crtl
->args
.pops_args
>= 65536)
11067 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11070 /* There is no "pascal" calling convention in any 64bit ABI. */
11071 gcc_assert (!TARGET_64BIT
);
11073 insn
= emit_insn (gen_pop (ecx
));
11074 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11075 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11077 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11078 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11079 add_reg_note (insn
, REG_CFA_REGISTER
,
11080 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11081 RTX_FRAME_RELATED_P (insn
) = 1;
11083 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11085 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11088 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11091 emit_jump_insn (gen_simple_return_internal ());
11093 /* Restore the state back to the state from the prologue,
11094 so that it's correct for the next epilogue. */
11095 m
->fs
= frame_state_save
;
11098 /* Reset from the function's potential modifications. */
11101 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11102 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11104 if (pic_offset_table_rtx
)
11105 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11107 /* Mach-O doesn't support labels at the end of objects, so if
11108 it looks like we might want one, insert a NOP. */
11110 rtx insn
= get_last_insn ();
11111 rtx deleted_debug_label
= NULL_RTX
;
11114 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11116 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11117 notes only, instead set their CODE_LABEL_NUMBER to -1,
11118 otherwise there would be code generation differences
11119 in between -g and -g0. */
11120 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11121 deleted_debug_label
= insn
;
11122 insn
= PREV_INSN (insn
);
11127 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11128 fputs ("\tnop\n", file
);
11129 else if (deleted_debug_label
)
11130 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11131 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11132 CODE_LABEL_NUMBER (insn
) = -1;
11138 /* Return a scratch register to use in the split stack prologue. The
11139 split stack prologue is used for -fsplit-stack. It is the first
11140 instructions in the function, even before the regular prologue.
11141 The scratch register can be any caller-saved register which is not
11142 used for parameters or for the static chain. */
11144 static unsigned int
11145 split_stack_prologue_scratch_regno (void)
11154 is_fastcall
= (lookup_attribute ("fastcall",
11155 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11157 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11161 if (DECL_STATIC_CHAIN (cfun
->decl
))
11163 sorry ("-fsplit-stack does not support fastcall with "
11164 "nested function");
11165 return INVALID_REGNUM
;
11169 else if (regparm
< 3)
11171 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11177 sorry ("-fsplit-stack does not support 2 register "
11178 " parameters for a nested function");
11179 return INVALID_REGNUM
;
11186 /* FIXME: We could make this work by pushing a register
11187 around the addition and comparison. */
11188 sorry ("-fsplit-stack does not support 3 register parameters");
11189 return INVALID_REGNUM
;
11194 /* A SYMBOL_REF for the function which allocates new stackspace for
11197 static GTY(()) rtx split_stack_fn
;
11199 /* A SYMBOL_REF for the more stack function when using the large
11202 static GTY(()) rtx split_stack_fn_large
;
11204 /* Handle -fsplit-stack. These are the first instructions in the
11205 function, even before the regular prologue. */
11208 ix86_expand_split_stack_prologue (void)
11210 struct ix86_frame frame
;
11211 HOST_WIDE_INT allocate
;
11212 unsigned HOST_WIDE_INT args_size
;
11213 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11214 rtx scratch_reg
= NULL_RTX
;
11215 rtx varargs_label
= NULL_RTX
;
11218 gcc_assert (flag_split_stack
&& reload_completed
);
11220 ix86_finalize_stack_realign_flags ();
11221 ix86_compute_frame_layout (&frame
);
11222 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11224 /* This is the label we will branch to if we have enough stack
11225 space. We expect the basic block reordering pass to reverse this
11226 branch if optimizing, so that we branch in the unlikely case. */
11227 label
= gen_label_rtx ();
11229 /* We need to compare the stack pointer minus the frame size with
11230 the stack boundary in the TCB. The stack boundary always gives
11231 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11232 can compare directly. Otherwise we need to do an addition. */
11234 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11235 UNSPEC_STACK_CHECK
);
11236 limit
= gen_rtx_CONST (Pmode
, limit
);
11237 limit
= gen_rtx_MEM (Pmode
, limit
);
11238 if (allocate
< SPLIT_STACK_AVAILABLE
)
11239 current
= stack_pointer_rtx
;
11242 unsigned int scratch_regno
;
11245 /* We need a scratch register to hold the stack pointer minus
11246 the required frame size. Since this is the very start of the
11247 function, the scratch register can be any caller-saved
11248 register which is not used for parameters. */
11249 offset
= GEN_INT (- allocate
);
11250 scratch_regno
= split_stack_prologue_scratch_regno ();
11251 if (scratch_regno
== INVALID_REGNUM
)
11253 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11254 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11256 /* We don't use ix86_gen_add3 in this case because it will
11257 want to split to lea, but when not optimizing the insn
11258 will not be split after this point. */
11259 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11260 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11265 emit_move_insn (scratch_reg
, offset
);
11266 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11267 stack_pointer_rtx
));
11269 current
= scratch_reg
;
11272 ix86_expand_branch (GEU
, current
, limit
, label
);
11273 jump_insn
= get_last_insn ();
11274 JUMP_LABEL (jump_insn
) = label
;
11276 /* Mark the jump as very likely to be taken. */
11277 add_reg_note (jump_insn
, REG_BR_PROB
,
11278 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11280 if (split_stack_fn
== NULL_RTX
)
11281 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11282 fn
= split_stack_fn
;
11284 /* Get more stack space. We pass in the desired stack space and the
11285 size of the arguments to copy to the new stack. In 32-bit mode
11286 we push the parameters; __morestack will return on a new stack
11287 anyhow. In 64-bit mode we pass the parameters in r10 and
11289 allocate_rtx
= GEN_INT (allocate
);
11290 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11291 call_fusage
= NULL_RTX
;
11296 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11297 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11299 /* If this function uses a static chain, it will be in %r10.
11300 Preserve it across the call to __morestack. */
11301 if (DECL_STATIC_CHAIN (cfun
->decl
))
11305 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11306 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11307 use_reg (&call_fusage
, rax
);
11310 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11312 HOST_WIDE_INT argval
;
11314 gcc_assert (Pmode
== DImode
);
11315 /* When using the large model we need to load the address
11316 into a register, and we've run out of registers. So we
11317 switch to a different calling convention, and we call a
11318 different function: __morestack_large. We pass the
11319 argument size in the upper 32 bits of r10 and pass the
11320 frame size in the lower 32 bits. */
11321 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11322 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11324 if (split_stack_fn_large
== NULL_RTX
)
11325 split_stack_fn_large
=
11326 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11328 if (ix86_cmodel
== CM_LARGE_PIC
)
11332 label
= gen_label_rtx ();
11333 emit_label (label
);
11334 LABEL_PRESERVE_P (label
) = 1;
11335 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11336 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11337 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11338 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11340 x
= gen_rtx_CONST (Pmode
, x
);
11341 emit_move_insn (reg11
, x
);
11342 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11343 x
= gen_const_mem (Pmode
, x
);
11344 emit_move_insn (reg11
, x
);
11347 emit_move_insn (reg11
, split_stack_fn_large
);
11351 argval
= ((args_size
<< 16) << 16) + allocate
;
11352 emit_move_insn (reg10
, GEN_INT (argval
));
11356 emit_move_insn (reg10
, allocate_rtx
);
11357 emit_move_insn (reg11
, GEN_INT (args_size
));
11358 use_reg (&call_fusage
, reg11
);
11361 use_reg (&call_fusage
, reg10
);
11365 emit_insn (gen_push (GEN_INT (args_size
)));
11366 emit_insn (gen_push (allocate_rtx
));
11368 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11369 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11371 add_function_usage_to (call_insn
, call_fusage
);
11373 /* In order to make call/return prediction work right, we now need
11374 to execute a return instruction. See
11375 libgcc/config/i386/morestack.S for the details on how this works.
11377 For flow purposes gcc must not see this as a return
11378 instruction--we need control flow to continue at the subsequent
11379 label. Therefore, we use an unspec. */
11380 gcc_assert (crtl
->args
.pops_args
< 65536);
11381 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11383 /* If we are in 64-bit mode and this function uses a static chain,
11384 we saved %r10 in %rax before calling _morestack. */
11385 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11386 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11387 gen_rtx_REG (word_mode
, AX_REG
));
11389 /* If this function calls va_start, we need to store a pointer to
11390 the arguments on the old stack, because they may not have been
11391 all copied to the new stack. At this point the old stack can be
11392 found at the frame pointer value used by __morestack, because
11393 __morestack has set that up before calling back to us. Here we
11394 store that pointer in a scratch register, and in
11395 ix86_expand_prologue we store the scratch register in a stack
11397 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11399 unsigned int scratch_regno
;
11403 scratch_regno
= split_stack_prologue_scratch_regno ();
11404 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11405 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11409 return address within this function
11410 return address of caller of this function
11412 So we add three words to get to the stack arguments.
11416 return address within this function
11417 first argument to __morestack
11418 second argument to __morestack
11419 return address of caller of this function
11421 So we add five words to get to the stack arguments.
11423 words
= TARGET_64BIT
? 3 : 5;
11424 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11425 gen_rtx_PLUS (Pmode
, frame_reg
,
11426 GEN_INT (words
* UNITS_PER_WORD
))));
11428 varargs_label
= gen_label_rtx ();
11429 emit_jump_insn (gen_jump (varargs_label
));
11430 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11435 emit_label (label
);
11436 LABEL_NUSES (label
) = 1;
11438 /* If this function calls va_start, we now have to set the scratch
11439 register for the case where we do not call __morestack. In this
11440 case we need to set it based on the stack pointer. */
11441 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11443 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11444 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11445 GEN_INT (UNITS_PER_WORD
))));
11447 emit_label (varargs_label
);
11448 LABEL_NUSES (varargs_label
) = 1;
11452 /* We may have to tell the dataflow pass that the split stack prologue
11453 is initializing a scratch register. */
11456 ix86_live_on_entry (bitmap regs
)
11458 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11460 gcc_assert (flag_split_stack
);
11461 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11465 /* Determine if op is suitable SUBREG RTX for address. */
11468 ix86_address_subreg_operand (rtx op
)
11470 enum machine_mode mode
;
11475 mode
= GET_MODE (op
);
11477 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11480 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11481 failures when the register is one word out of a two word structure. */
11482 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11485 /* Allow only SUBREGs of non-eliminable hard registers. */
11486 return register_no_elim_operand (op
, mode
);
11489 /* Extract the parts of an RTL expression that is a valid memory address
11490 for an instruction. Return 0 if the structure of the address is
11491 grossly off. Return -1 if the address contains ASHIFT, so it is not
11492 strictly valid, but still used for computing length of lea instruction. */
11495 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11497 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11498 rtx base_reg
, index_reg
;
11499 HOST_WIDE_INT scale
= 1;
11500 rtx scale_rtx
= NULL_RTX
;
11503 enum ix86_address_seg seg
= SEG_DEFAULT
;
11505 /* Allow zero-extended SImode addresses,
11506 they will be emitted with addr32 prefix. */
11507 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11509 if (GET_CODE (addr
) == ZERO_EXTEND
11510 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11511 addr
= XEXP (addr
, 0);
11512 else if (GET_CODE (addr
) == AND
11513 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11515 addr
= XEXP (addr
, 0);
11517 /* Adjust SUBREGs. */
11518 if (GET_CODE (addr
) == SUBREG
11519 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11520 addr
= SUBREG_REG (addr
);
11521 else if (GET_MODE (addr
) == DImode
)
11522 addr
= gen_rtx_SUBREG (SImode
, addr
, 0);
11523 else if (GET_MODE (addr
) != VOIDmode
)
11530 else if (GET_CODE (addr
) == SUBREG
)
11532 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11537 else if (GET_CODE (addr
) == PLUS
)
11539 rtx addends
[4], op
;
11547 addends
[n
++] = XEXP (op
, 1);
11550 while (GET_CODE (op
) == PLUS
);
11555 for (i
= n
; i
>= 0; --i
)
11558 switch (GET_CODE (op
))
11563 index
= XEXP (op
, 0);
11564 scale_rtx
= XEXP (op
, 1);
11570 index
= XEXP (op
, 0);
11571 tmp
= XEXP (op
, 1);
11572 if (!CONST_INT_P (tmp
))
11574 scale
= INTVAL (tmp
);
11575 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11577 scale
= 1 << scale
;
11582 if (GET_CODE (op
) != UNSPEC
)
11587 if (XINT (op
, 1) == UNSPEC_TP
11588 && TARGET_TLS_DIRECT_SEG_REFS
11589 && seg
== SEG_DEFAULT
)
11590 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11596 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11623 else if (GET_CODE (addr
) == MULT
)
11625 index
= XEXP (addr
, 0); /* index*scale */
11626 scale_rtx
= XEXP (addr
, 1);
11628 else if (GET_CODE (addr
) == ASHIFT
)
11630 /* We're called for lea too, which implements ashift on occasion. */
11631 index
= XEXP (addr
, 0);
11632 tmp
= XEXP (addr
, 1);
11633 if (!CONST_INT_P (tmp
))
11635 scale
= INTVAL (tmp
);
11636 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11638 scale
= 1 << scale
;
11642 disp
= addr
; /* displacement */
11648 else if (GET_CODE (index
) == SUBREG
11649 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11655 /* Address override works only on the (%reg) part of %fs:(%reg). */
11656 if (seg
!= SEG_DEFAULT
11657 && ((base
&& GET_MODE (base
) != word_mode
)
11658 || (index
&& GET_MODE (index
) != word_mode
)))
11661 /* Extract the integral value of scale. */
11664 if (!CONST_INT_P (scale_rtx
))
11666 scale
= INTVAL (scale_rtx
);
11669 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11670 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11672 /* Avoid useless 0 displacement. */
11673 if (disp
== const0_rtx
&& (base
|| index
))
11676 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11677 if (base_reg
&& index_reg
&& scale
== 1
11678 && (index_reg
== arg_pointer_rtx
11679 || index_reg
== frame_pointer_rtx
11680 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11683 tmp
= base
, base
= index
, index
= tmp
;
11684 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11687 /* Special case: %ebp cannot be encoded as a base without a displacement.
11691 && (base_reg
== hard_frame_pointer_rtx
11692 || base_reg
== frame_pointer_rtx
11693 || base_reg
== arg_pointer_rtx
11694 || (REG_P (base_reg
)
11695 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11696 || REGNO (base_reg
) == R13_REG
))))
11699 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11700 Avoid this by transforming to [%esi+0].
11701 Reload calls address legitimization without cfun defined, so we need
11702 to test cfun for being non-NULL. */
11703 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11704 && base_reg
&& !index_reg
&& !disp
11705 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11708 /* Special case: encode reg+reg instead of reg*2. */
11709 if (!base
&& index
&& scale
== 2)
11710 base
= index
, base_reg
= index_reg
, scale
= 1;
11712 /* Special case: scaling cannot be encoded without base or displacement. */
11713 if (!base
&& !disp
&& index
&& scale
!= 1)
11717 out
->index
= index
;
11719 out
->scale
= scale
;
11725 /* Return cost of the memory address x.
11726 For i386, it is better to use a complex address than let gcc copy
11727 the address into a reg and make a new pseudo. But not if the address
11728 requires to two regs - that would mean more pseudos with longer
11731 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11733 struct ix86_address parts
;
11735 int ok
= ix86_decompose_address (x
, &parts
);
11739 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11740 parts
.base
= SUBREG_REG (parts
.base
);
11741 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11742 parts
.index
= SUBREG_REG (parts
.index
);
11744 /* Attempt to minimize number of registers in the address. */
11746 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11748 && (!REG_P (parts
.index
)
11749 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11753 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11755 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11756 && parts
.base
!= parts
.index
)
11759 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11760 since it's predecode logic can't detect the length of instructions
11761 and it degenerates to vector decoded. Increase cost of such
11762 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11763 to split such addresses or even refuse such addresses at all.
11765 Following addressing modes are affected:
11770 The first and last case may be avoidable by explicitly coding the zero in
11771 memory address, but I don't have AMD-K6 machine handy to check this
11775 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11776 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11777 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11783 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11784 this is used for to form addresses to local data when -fPIC is in
11788 darwin_local_data_pic (rtx disp
)
11790 return (GET_CODE (disp
) == UNSPEC
11791 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11794 /* Determine if a given RTX is a valid constant. We already know this
11795 satisfies CONSTANT_P. */
11798 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11800 switch (GET_CODE (x
))
11805 if (GET_CODE (x
) == PLUS
)
11807 if (!CONST_INT_P (XEXP (x
, 1)))
11812 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11815 /* Only some unspecs are valid as "constants". */
11816 if (GET_CODE (x
) == UNSPEC
)
11817 switch (XINT (x
, 1))
11820 case UNSPEC_GOTOFF
:
11821 case UNSPEC_PLTOFF
:
11822 return TARGET_64BIT
;
11824 case UNSPEC_NTPOFF
:
11825 x
= XVECEXP (x
, 0, 0);
11826 return (GET_CODE (x
) == SYMBOL_REF
11827 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11828 case UNSPEC_DTPOFF
:
11829 x
= XVECEXP (x
, 0, 0);
11830 return (GET_CODE (x
) == SYMBOL_REF
11831 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11836 /* We must have drilled down to a symbol. */
11837 if (GET_CODE (x
) == LABEL_REF
)
11839 if (GET_CODE (x
) != SYMBOL_REF
)
11844 /* TLS symbols are never valid. */
11845 if (SYMBOL_REF_TLS_MODEL (x
))
11848 /* DLLIMPORT symbols are never valid. */
11849 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11850 && SYMBOL_REF_DLLIMPORT_P (x
))
11854 /* mdynamic-no-pic */
11855 if (MACHO_DYNAMIC_NO_PIC_P
)
11856 return machopic_symbol_defined_p (x
);
11861 if (GET_MODE (x
) == TImode
11862 && x
!= CONST0_RTX (TImode
)
11868 if (!standard_sse_constant_p (x
))
11875 /* Otherwise we handle everything else in the move patterns. */
11879 /* Determine if it's legal to put X into the constant pool. This
11880 is not possible for the address of thread-local symbols, which
11881 is checked above. */
11884 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11886 /* We can always put integral constants and vectors in memory. */
11887 switch (GET_CODE (x
))
11897 return !ix86_legitimate_constant_p (mode
, x
);
11901 /* Nonzero if the constant value X is a legitimate general operand
11902 when generating PIC code. It is given that flag_pic is on and
11903 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11906 legitimate_pic_operand_p (rtx x
)
11910 switch (GET_CODE (x
))
11913 inner
= XEXP (x
, 0);
11914 if (GET_CODE (inner
) == PLUS
11915 && CONST_INT_P (XEXP (inner
, 1)))
11916 inner
= XEXP (inner
, 0);
11918 /* Only some unspecs are valid as "constants". */
11919 if (GET_CODE (inner
) == UNSPEC
)
11920 switch (XINT (inner
, 1))
11923 case UNSPEC_GOTOFF
:
11924 case UNSPEC_PLTOFF
:
11925 return TARGET_64BIT
;
11927 x
= XVECEXP (inner
, 0, 0);
11928 return (GET_CODE (x
) == SYMBOL_REF
11929 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11930 case UNSPEC_MACHOPIC_OFFSET
:
11931 return legitimate_pic_address_disp_p (x
);
11939 return legitimate_pic_address_disp_p (x
);
11946 /* Determine if a given CONST RTX is a valid memory displacement
11950 legitimate_pic_address_disp_p (rtx disp
)
11954 /* In 64bit mode we can allow direct addresses of symbols and labels
11955 when they are not dynamic symbols. */
11958 rtx op0
= disp
, op1
;
11960 switch (GET_CODE (disp
))
11966 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11968 op0
= XEXP (XEXP (disp
, 0), 0);
11969 op1
= XEXP (XEXP (disp
, 0), 1);
11970 if (!CONST_INT_P (op1
)
11971 || INTVAL (op1
) >= 16*1024*1024
11972 || INTVAL (op1
) < -16*1024*1024)
11974 if (GET_CODE (op0
) == LABEL_REF
)
11976 if (GET_CODE (op0
) == CONST
11977 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
11978 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
11980 if (GET_CODE (op0
) == UNSPEC
11981 && XINT (op0
, 1) == UNSPEC_PCREL
)
11983 if (GET_CODE (op0
) != SYMBOL_REF
)
11988 /* TLS references should always be enclosed in UNSPEC. */
11989 if (SYMBOL_REF_TLS_MODEL (op0
))
11991 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
11992 && ix86_cmodel
!= CM_LARGE_PIC
)
12000 if (GET_CODE (disp
) != CONST
)
12002 disp
= XEXP (disp
, 0);
12006 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12007 of GOT tables. We should not need these anyway. */
12008 if (GET_CODE (disp
) != UNSPEC
12009 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12010 && XINT (disp
, 1) != UNSPEC_GOTOFF
12011 && XINT (disp
, 1) != UNSPEC_PCREL
12012 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12015 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12016 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12022 if (GET_CODE (disp
) == PLUS
)
12024 if (!CONST_INT_P (XEXP (disp
, 1)))
12026 disp
= XEXP (disp
, 0);
12030 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12033 if (GET_CODE (disp
) != UNSPEC
)
12036 switch (XINT (disp
, 1))
12041 /* We need to check for both symbols and labels because VxWorks loads
12042 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12044 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12045 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12046 case UNSPEC_GOTOFF
:
12047 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12048 While ABI specify also 32bit relocation but we don't produce it in
12049 small PIC model at all. */
12050 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12051 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12053 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12055 case UNSPEC_GOTTPOFF
:
12056 case UNSPEC_GOTNTPOFF
:
12057 case UNSPEC_INDNTPOFF
:
12060 disp
= XVECEXP (disp
, 0, 0);
12061 return (GET_CODE (disp
) == SYMBOL_REF
12062 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12063 case UNSPEC_NTPOFF
:
12064 disp
= XVECEXP (disp
, 0, 0);
12065 return (GET_CODE (disp
) == SYMBOL_REF
12066 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12067 case UNSPEC_DTPOFF
:
12068 disp
= XVECEXP (disp
, 0, 0);
12069 return (GET_CODE (disp
) == SYMBOL_REF
12070 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12076 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12077 replace the input X, or the original X if no replacement is called for.
12078 The output parameter *WIN is 1 if the calling macro should goto WIN,
12079 0 if it should not. */
12082 ix86_legitimize_reload_address (rtx x
,
12083 enum machine_mode mode ATTRIBUTE_UNUSED
,
12084 int opnum
, int type
,
12085 int ind_levels ATTRIBUTE_UNUSED
)
12087 /* Reload can generate:
12089 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12093 This RTX is rejected from ix86_legitimate_address_p due to
12094 non-strictness of base register 97. Following this rejection,
12095 reload pushes all three components into separate registers,
12096 creating invalid memory address RTX.
12098 Following code reloads only the invalid part of the
12099 memory address RTX. */
12101 if (GET_CODE (x
) == PLUS
12102 && REG_P (XEXP (x
, 1))
12103 && GET_CODE (XEXP (x
, 0)) == PLUS
12104 && REG_P (XEXP (XEXP (x
, 0), 1)))
12107 bool something_reloaded
= false;
12109 base
= XEXP (XEXP (x
, 0), 1);
12110 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12112 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12113 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12114 opnum
, (enum reload_type
) type
);
12115 something_reloaded
= true;
12118 index
= XEXP (x
, 1);
12119 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12121 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12122 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12123 opnum
, (enum reload_type
) type
);
12124 something_reloaded
= true;
12127 gcc_assert (something_reloaded
);
12134 /* Recognizes RTL expressions that are valid memory addresses for an
12135 instruction. The MODE argument is the machine mode for the MEM
12136 expression that wants to use this address.
12138 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12139 convert common non-canonical forms to canonical form so that they will
12143 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12144 rtx addr
, bool strict
)
12146 struct ix86_address parts
;
12147 rtx base
, index
, disp
;
12148 HOST_WIDE_INT scale
;
12150 /* Since constant address in x32 is signed extended to 64bit,
12151 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12153 && CONST_INT_P (addr
)
12154 && INTVAL (addr
) < 0)
12157 if (ix86_decompose_address (addr
, &parts
) <= 0)
12158 /* Decomposition failed. */
12162 index
= parts
.index
;
12164 scale
= parts
.scale
;
12166 /* Validate base register. */
12173 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12174 reg
= SUBREG_REG (base
);
12176 /* Base is not a register. */
12179 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12182 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12183 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12184 /* Base is not valid. */
12188 /* Validate index register. */
12195 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12196 reg
= SUBREG_REG (index
);
12198 /* Index is not a register. */
12201 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12204 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12205 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12206 /* Index is not valid. */
12210 /* Index and base should have the same mode. */
12212 && GET_MODE (base
) != GET_MODE (index
))
12215 /* Validate scale factor. */
12219 /* Scale without index. */
12222 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12223 /* Scale is not a valid multiplier. */
12227 /* Validate displacement. */
12230 if (GET_CODE (disp
) == CONST
12231 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12232 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12233 switch (XINT (XEXP (disp
, 0), 1))
12235 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12236 used. While ABI specify also 32bit relocations, we don't produce
12237 them at all and use IP relative instead. */
12239 case UNSPEC_GOTOFF
:
12240 gcc_assert (flag_pic
);
12242 goto is_legitimate_pic
;
12244 /* 64bit address unspec. */
12247 case UNSPEC_GOTPCREL
:
12249 gcc_assert (flag_pic
);
12250 goto is_legitimate_pic
;
12252 case UNSPEC_GOTTPOFF
:
12253 case UNSPEC_GOTNTPOFF
:
12254 case UNSPEC_INDNTPOFF
:
12255 case UNSPEC_NTPOFF
:
12256 case UNSPEC_DTPOFF
:
12259 case UNSPEC_STACK_CHECK
:
12260 gcc_assert (flag_split_stack
);
12264 /* Invalid address unspec. */
12268 else if (SYMBOLIC_CONST (disp
)
12272 && MACHOPIC_INDIRECT
12273 && !machopic_operand_p (disp
)
12279 if (TARGET_64BIT
&& (index
|| base
))
12281 /* foo@dtpoff(%rX) is ok. */
12282 if (GET_CODE (disp
) != CONST
12283 || GET_CODE (XEXP (disp
, 0)) != PLUS
12284 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12285 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12286 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12287 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12288 /* Non-constant pic memory reference. */
12291 else if ((!TARGET_MACHO
|| flag_pic
)
12292 && ! legitimate_pic_address_disp_p (disp
))
12293 /* Displacement is an invalid pic construct. */
12296 else if (MACHO_DYNAMIC_NO_PIC_P
12297 && !ix86_legitimate_constant_p (Pmode
, disp
))
12298 /* displacment must be referenced via non_lazy_pointer */
12302 /* This code used to verify that a symbolic pic displacement
12303 includes the pic_offset_table_rtx register.
12305 While this is good idea, unfortunately these constructs may
12306 be created by "adds using lea" optimization for incorrect
12315 This code is nonsensical, but results in addressing
12316 GOT table with pic_offset_table_rtx base. We can't
12317 just refuse it easily, since it gets matched by
12318 "addsi3" pattern, that later gets split to lea in the
12319 case output register differs from input. While this
12320 can be handled by separate addsi pattern for this case
12321 that never results in lea, this seems to be easier and
12322 correct fix for crash to disable this test. */
12324 else if (GET_CODE (disp
) != LABEL_REF
12325 && !CONST_INT_P (disp
)
12326 && (GET_CODE (disp
) != CONST
12327 || !ix86_legitimate_constant_p (Pmode
, disp
))
12328 && (GET_CODE (disp
) != SYMBOL_REF
12329 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12330 /* Displacement is not constant. */
12332 else if (TARGET_64BIT
12333 && !x86_64_immediate_operand (disp
, VOIDmode
))
12334 /* Displacement is out of range. */
12338 /* Everything looks valid. */
12342 /* Determine if a given RTX is a valid constant address. */
12345 constant_address_p (rtx x
)
12347 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12350 /* Return a unique alias set for the GOT. */
12352 static alias_set_type
12353 ix86_GOT_alias_set (void)
12355 static alias_set_type set
= -1;
12357 set
= new_alias_set ();
12361 /* Return a legitimate reference for ORIG (an address) using the
12362 register REG. If REG is 0, a new pseudo is generated.
12364 There are two types of references that must be handled:
12366 1. Global data references must load the address from the GOT, via
12367 the PIC reg. An insn is emitted to do this load, and the reg is
12370 2. Static data references, constant pool addresses, and code labels
12371 compute the address as an offset from the GOT, whose base is in
12372 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12373 differentiate them from global data objects. The returned
12374 address is the PIC reg + an unspec constant.
12376 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12377 reg also appears in the address. */
12380 legitimize_pic_address (rtx orig
, rtx reg
)
12383 rtx new_rtx
= orig
;
12387 if (TARGET_MACHO
&& !TARGET_64BIT
)
12390 reg
= gen_reg_rtx (Pmode
);
12391 /* Use the generic Mach-O PIC machinery. */
12392 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12396 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12398 else if (TARGET_64BIT
12399 && ix86_cmodel
!= CM_SMALL_PIC
12400 && gotoff_operand (addr
, Pmode
))
12403 /* This symbol may be referenced via a displacement from the PIC
12404 base address (@GOTOFF). */
12406 if (reload_in_progress
)
12407 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12408 if (GET_CODE (addr
) == CONST
)
12409 addr
= XEXP (addr
, 0);
12410 if (GET_CODE (addr
) == PLUS
)
12412 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12414 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12417 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12418 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12420 tmpreg
= gen_reg_rtx (Pmode
);
12423 emit_move_insn (tmpreg
, new_rtx
);
12427 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12428 tmpreg
, 1, OPTAB_DIRECT
);
12431 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12433 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12435 /* This symbol may be referenced via a displacement from the PIC
12436 base address (@GOTOFF). */
12438 if (reload_in_progress
)
12439 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12440 if (GET_CODE (addr
) == CONST
)
12441 addr
= XEXP (addr
, 0);
12442 if (GET_CODE (addr
) == PLUS
)
12444 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12446 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12449 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12450 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12451 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12455 emit_move_insn (reg
, new_rtx
);
12459 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12460 /* We can't use @GOTOFF for text labels on VxWorks;
12461 see gotoff_operand. */
12462 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12464 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12466 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12467 return legitimize_dllimport_symbol (addr
, true);
12468 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12469 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12470 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12472 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12473 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12477 /* For x64 PE-COFF there is no GOT table. So we use address
12479 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12481 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12482 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12485 reg
= gen_reg_rtx (Pmode
);
12486 emit_move_insn (reg
, new_rtx
);
12489 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12491 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12492 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12493 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12494 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12497 reg
= gen_reg_rtx (Pmode
);
12498 /* Use directly gen_movsi, otherwise the address is loaded
12499 into register for CSE. We don't want to CSE this addresses,
12500 instead we CSE addresses from the GOT table, so skip this. */
12501 emit_insn (gen_movsi (reg
, new_rtx
));
12506 /* This symbol must be referenced via a load from the
12507 Global Offset Table (@GOT). */
12509 if (reload_in_progress
)
12510 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12511 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12512 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12514 new_rtx
= force_reg (Pmode
, new_rtx
);
12515 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12516 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12517 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12520 reg
= gen_reg_rtx (Pmode
);
12521 emit_move_insn (reg
, new_rtx
);
12527 if (CONST_INT_P (addr
)
12528 && !x86_64_immediate_operand (addr
, VOIDmode
))
12532 emit_move_insn (reg
, addr
);
12536 new_rtx
= force_reg (Pmode
, addr
);
12538 else if (GET_CODE (addr
) == CONST
)
12540 addr
= XEXP (addr
, 0);
12542 /* We must match stuff we generate before. Assume the only
12543 unspecs that can get here are ours. Not that we could do
12544 anything with them anyway.... */
12545 if (GET_CODE (addr
) == UNSPEC
12546 || (GET_CODE (addr
) == PLUS
12547 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12549 gcc_assert (GET_CODE (addr
) == PLUS
);
12551 if (GET_CODE (addr
) == PLUS
)
12553 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12555 /* Check first to see if this is a constant offset from a @GOTOFF
12556 symbol reference. */
12557 if (gotoff_operand (op0
, Pmode
)
12558 && CONST_INT_P (op1
))
12562 if (reload_in_progress
)
12563 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12564 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12566 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12567 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12568 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12572 emit_move_insn (reg
, new_rtx
);
12578 if (INTVAL (op1
) < -16*1024*1024
12579 || INTVAL (op1
) >= 16*1024*1024)
12581 if (!x86_64_immediate_operand (op1
, Pmode
))
12582 op1
= force_reg (Pmode
, op1
);
12583 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12589 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12590 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12591 base
== reg
? NULL_RTX
: reg
);
12593 if (CONST_INT_P (new_rtx
))
12594 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
12597 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12599 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12600 new_rtx
= XEXP (new_rtx
, 1);
12602 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12610 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12613 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12615 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12617 if (GET_MODE (tp
) != tp_mode
)
12619 gcc_assert (GET_MODE (tp
) == SImode
);
12620 gcc_assert (tp_mode
== DImode
);
12622 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12626 tp
= copy_to_mode_reg (tp_mode
, tp
);
12631 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12633 static GTY(()) rtx ix86_tls_symbol
;
12636 ix86_tls_get_addr (void)
12638 if (!ix86_tls_symbol
)
12641 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12642 ? "___tls_get_addr" : "__tls_get_addr");
12644 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12647 return ix86_tls_symbol
;
12650 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12652 static GTY(()) rtx ix86_tls_module_base_symbol
;
12655 ix86_tls_module_base (void)
12657 if (!ix86_tls_module_base_symbol
)
12659 ix86_tls_module_base_symbol
12660 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12662 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12663 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12666 return ix86_tls_module_base_symbol
;
12669 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12670 false if we expect this to be used for a memory address and true if
12671 we expect to load the address into a register. */
12674 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12676 rtx dest
, base
, off
;
12677 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12678 enum machine_mode tp_mode
= Pmode
;
12683 case TLS_MODEL_GLOBAL_DYNAMIC
:
12684 dest
= gen_reg_rtx (Pmode
);
12689 pic
= pic_offset_table_rtx
;
12692 pic
= gen_reg_rtx (Pmode
);
12693 emit_insn (gen_set_got (pic
));
12697 if (TARGET_GNU2_TLS
)
12700 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12702 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12704 tp
= get_thread_pointer (Pmode
, true);
12705 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12707 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12711 rtx caddr
= ix86_tls_get_addr ();
12715 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12718 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
12720 insns
= get_insns ();
12723 RTL_CONST_CALL_P (insns
) = 1;
12724 emit_libcall_block (insns
, dest
, rax
, x
);
12727 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12731 case TLS_MODEL_LOCAL_DYNAMIC
:
12732 base
= gen_reg_rtx (Pmode
);
12737 pic
= pic_offset_table_rtx
;
12740 pic
= gen_reg_rtx (Pmode
);
12741 emit_insn (gen_set_got (pic
));
12745 if (TARGET_GNU2_TLS
)
12747 rtx tmp
= ix86_tls_module_base ();
12750 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12752 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12754 tp
= get_thread_pointer (Pmode
, true);
12755 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12756 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12760 rtx caddr
= ix86_tls_get_addr ();
12764 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12767 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
12769 insns
= get_insns ();
12772 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12773 share the LD_BASE result with other LD model accesses. */
12774 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12775 UNSPEC_TLS_LD_BASE
);
12777 RTL_CONST_CALL_P (insns
) = 1;
12778 emit_libcall_block (insns
, base
, rax
, eqv
);
12781 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12784 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12785 off
= gen_rtx_CONST (Pmode
, off
);
12787 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12789 if (TARGET_GNU2_TLS
)
12791 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12793 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12797 case TLS_MODEL_INITIAL_EXEC
:
12800 if (TARGET_SUN_TLS
)
12802 /* The Sun linker took the AMD64 TLS spec literally
12803 and can only handle %rax as destination of the
12804 initial executable code sequence. */
12806 dest
= gen_reg_rtx (Pmode
);
12807 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12811 /* Generate DImode references to avoid %fs:(%reg32)
12812 problems and linker IE->LE relaxation bug. */
12815 type
= UNSPEC_GOTNTPOFF
;
12819 if (reload_in_progress
)
12820 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12821 pic
= pic_offset_table_rtx
;
12822 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12824 else if (!TARGET_ANY_GNU_TLS
)
12826 pic
= gen_reg_rtx (Pmode
);
12827 emit_insn (gen_set_got (pic
));
12828 type
= UNSPEC_GOTTPOFF
;
12833 type
= UNSPEC_INDNTPOFF
;
12836 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12837 off
= gen_rtx_CONST (tp_mode
, off
);
12839 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12840 off
= gen_const_mem (tp_mode
, off
);
12841 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12843 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12845 base
= get_thread_pointer (tp_mode
,
12846 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12847 off
= force_reg (tp_mode
, off
);
12848 return gen_rtx_PLUS (tp_mode
, base
, off
);
12852 base
= get_thread_pointer (Pmode
, true);
12853 dest
= gen_reg_rtx (Pmode
);
12854 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12858 case TLS_MODEL_LOCAL_EXEC
:
12859 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12860 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12861 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12862 off
= gen_rtx_CONST (Pmode
, off
);
12864 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12866 base
= get_thread_pointer (Pmode
,
12867 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12868 return gen_rtx_PLUS (Pmode
, base
, off
);
12872 base
= get_thread_pointer (Pmode
, true);
12873 dest
= gen_reg_rtx (Pmode
);
12874 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12879 gcc_unreachable ();
12885 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12888 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12889 htab_t dllimport_map
;
12892 get_dllimport_decl (tree decl
)
12894 struct tree_map
*h
, in
;
12897 const char *prefix
;
12898 size_t namelen
, prefixlen
;
12903 if (!dllimport_map
)
12904 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12906 in
.hash
= htab_hash_pointer (decl
);
12907 in
.base
.from
= decl
;
12908 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12909 h
= (struct tree_map
*) *loc
;
12913 *loc
= h
= ggc_alloc_tree_map ();
12915 h
->base
.from
= decl
;
12916 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12917 VAR_DECL
, NULL
, ptr_type_node
);
12918 DECL_ARTIFICIAL (to
) = 1;
12919 DECL_IGNORED_P (to
) = 1;
12920 DECL_EXTERNAL (to
) = 1;
12921 TREE_READONLY (to
) = 1;
12923 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12924 name
= targetm
.strip_name_encoding (name
);
12925 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12926 ? "*__imp_" : "*__imp__";
12927 namelen
= strlen (name
);
12928 prefixlen
= strlen (prefix
);
12929 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12930 memcpy (imp_name
, prefix
, prefixlen
);
12931 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12933 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12934 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12935 SET_SYMBOL_REF_DECL (rtl
, to
);
12936 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12938 rtl
= gen_const_mem (Pmode
, rtl
);
12939 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12941 SET_DECL_RTL (to
, rtl
);
12942 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12947 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12948 true if we require the result be a register. */
12951 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12956 gcc_assert (SYMBOL_REF_DECL (symbol
));
12957 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12959 x
= DECL_RTL (imp_decl
);
12961 x
= force_reg (Pmode
, x
);
12965 /* Try machine-dependent ways of modifying an illegitimate address
12966 to be legitimate. If we find one, return the new, valid address.
12967 This macro is used in only one place: `memory_address' in explow.c.
12969 OLDX is the address as it was before break_out_memory_refs was called.
12970 In some cases it is useful to look at this to decide what needs to be done.
12972 It is always safe for this macro to do nothing. It exists to recognize
12973 opportunities to optimize the output.
12975 For the 80386, we handle X+REG by loading X into a register R and
12976 using R+REG. R will go in a general reg and indexing will be used.
12977 However, if REG is a broken-out memory address or multiplication,
12978 nothing needs to be done because REG can certainly go in a general reg.
12980 When -fpic is used, special handling is needed for symbolic references.
12981 See comments by legitimize_pic_address in i386.c for details. */
12984 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12985 enum machine_mode mode
)
12990 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12992 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12993 if (GET_CODE (x
) == CONST
12994 && GET_CODE (XEXP (x
, 0)) == PLUS
12995 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12996 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12998 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12999 (enum tls_model
) log
, false);
13000 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13003 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13005 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13006 return legitimize_dllimport_symbol (x
, true);
13007 if (GET_CODE (x
) == CONST
13008 && GET_CODE (XEXP (x
, 0)) == PLUS
13009 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13010 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13012 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13013 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13017 if (flag_pic
&& SYMBOLIC_CONST (x
))
13018 return legitimize_pic_address (x
, 0);
13021 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13022 return machopic_indirect_data_reference (x
, 0);
13025 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13026 if (GET_CODE (x
) == ASHIFT
13027 && CONST_INT_P (XEXP (x
, 1))
13028 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13031 log
= INTVAL (XEXP (x
, 1));
13032 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13033 GEN_INT (1 << log
));
13036 if (GET_CODE (x
) == PLUS
)
13038 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13040 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13041 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13042 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13045 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13046 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13047 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13048 GEN_INT (1 << log
));
13051 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13052 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13053 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13056 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13057 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13058 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13059 GEN_INT (1 << log
));
13062 /* Put multiply first if it isn't already. */
13063 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13065 rtx tmp
= XEXP (x
, 0);
13066 XEXP (x
, 0) = XEXP (x
, 1);
13071 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13072 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13073 created by virtual register instantiation, register elimination, and
13074 similar optimizations. */
13075 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13078 x
= gen_rtx_PLUS (Pmode
,
13079 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13080 XEXP (XEXP (x
, 1), 0)),
13081 XEXP (XEXP (x
, 1), 1));
13085 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13086 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13087 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13088 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13089 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13090 && CONSTANT_P (XEXP (x
, 1)))
13093 rtx other
= NULL_RTX
;
13095 if (CONST_INT_P (XEXP (x
, 1)))
13097 constant
= XEXP (x
, 1);
13098 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13100 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13102 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13103 other
= XEXP (x
, 1);
13111 x
= gen_rtx_PLUS (Pmode
,
13112 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13113 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13114 plus_constant (Pmode
, other
,
13115 INTVAL (constant
)));
13119 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13122 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13125 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13128 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13131 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13135 && REG_P (XEXP (x
, 1))
13136 && REG_P (XEXP (x
, 0)))
13139 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13142 x
= legitimize_pic_address (x
, 0);
13145 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13148 if (REG_P (XEXP (x
, 0)))
13150 rtx temp
= gen_reg_rtx (Pmode
);
13151 rtx val
= force_operand (XEXP (x
, 1), temp
);
13154 if (GET_MODE (val
) != Pmode
)
13155 val
= convert_to_mode (Pmode
, val
, 1);
13156 emit_move_insn (temp
, val
);
13159 XEXP (x
, 1) = temp
;
13163 else if (REG_P (XEXP (x
, 1)))
13165 rtx temp
= gen_reg_rtx (Pmode
);
13166 rtx val
= force_operand (XEXP (x
, 0), temp
);
13169 if (GET_MODE (val
) != Pmode
)
13170 val
= convert_to_mode (Pmode
, val
, 1);
13171 emit_move_insn (temp
, val
);
13174 XEXP (x
, 0) = temp
;
13182 /* Print an integer constant expression in assembler syntax. Addition
13183 and subtraction are the only arithmetic that may appear in these
13184 expressions. FILE is the stdio stream to write to, X is the rtx, and
13185 CODE is the operand print code from the output string. */
13188 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13192 switch (GET_CODE (x
))
13195 gcc_assert (flag_pic
);
13200 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13201 output_addr_const (file
, x
);
13204 const char *name
= XSTR (x
, 0);
13206 /* Mark the decl as referenced so that cgraph will
13207 output the function. */
13208 if (SYMBOL_REF_DECL (x
))
13209 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13212 if (MACHOPIC_INDIRECT
13213 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13214 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13216 assemble_name (file
, name
);
13218 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13219 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13220 fputs ("@PLT", file
);
13227 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13228 assemble_name (asm_out_file
, buf
);
13232 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13236 /* This used to output parentheses around the expression,
13237 but that does not work on the 386 (either ATT or BSD assembler). */
13238 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13242 if (GET_MODE (x
) == VOIDmode
)
13244 /* We can use %d if the number is <32 bits and positive. */
13245 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13246 fprintf (file
, "0x%lx%08lx",
13247 (unsigned long) CONST_DOUBLE_HIGH (x
),
13248 (unsigned long) CONST_DOUBLE_LOW (x
));
13250 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13253 /* We can't handle floating point constants;
13254 TARGET_PRINT_OPERAND must handle them. */
13255 output_operand_lossage ("floating constant misused");
13259 /* Some assemblers need integer constants to appear first. */
13260 if (CONST_INT_P (XEXP (x
, 0)))
13262 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13264 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13268 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13269 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13271 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13277 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13278 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13280 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13282 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13286 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13288 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13293 gcc_assert (XVECLEN (x
, 0) == 1);
13294 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13295 switch (XINT (x
, 1))
13298 fputs ("@GOT", file
);
13300 case UNSPEC_GOTOFF
:
13301 fputs ("@GOTOFF", file
);
13303 case UNSPEC_PLTOFF
:
13304 fputs ("@PLTOFF", file
);
13307 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13308 "(%rip)" : "[rip]", file
);
13310 case UNSPEC_GOTPCREL
:
13311 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13312 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13314 case UNSPEC_GOTTPOFF
:
13315 /* FIXME: This might be @TPOFF in Sun ld too. */
13316 fputs ("@gottpoff", file
);
13319 fputs ("@tpoff", file
);
13321 case UNSPEC_NTPOFF
:
13323 fputs ("@tpoff", file
);
13325 fputs ("@ntpoff", file
);
13327 case UNSPEC_DTPOFF
:
13328 fputs ("@dtpoff", file
);
13330 case UNSPEC_GOTNTPOFF
:
13332 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13333 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13335 fputs ("@gotntpoff", file
);
13337 case UNSPEC_INDNTPOFF
:
13338 fputs ("@indntpoff", file
);
13341 case UNSPEC_MACHOPIC_OFFSET
:
13343 machopic_output_function_base_name (file
);
13347 output_operand_lossage ("invalid UNSPEC as operand");
13353 output_operand_lossage ("invalid expression as operand");
13357 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13358 We need to emit DTP-relative relocations. */
13360 static void ATTRIBUTE_UNUSED
13361 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13363 fputs (ASM_LONG
, file
);
13364 output_addr_const (file
, x
);
13365 fputs ("@dtpoff", file
);
13371 fputs (", 0", file
);
13374 gcc_unreachable ();
13378 /* Return true if X is a representation of the PIC register. This copes
13379 with calls from ix86_find_base_term, where the register might have
13380 been replaced by a cselib value. */
13383 ix86_pic_register_p (rtx x
)
13385 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13386 return (pic_offset_table_rtx
13387 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13389 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13392 /* Helper function for ix86_delegitimize_address.
13393 Attempt to delegitimize TLS local-exec accesses. */
13396 ix86_delegitimize_tls_address (rtx orig_x
)
13398 rtx x
= orig_x
, unspec
;
13399 struct ix86_address addr
;
13401 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13405 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13407 if (ix86_decompose_address (x
, &addr
) == 0
13408 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13409 || addr
.disp
== NULL_RTX
13410 || GET_CODE (addr
.disp
) != CONST
)
13412 unspec
= XEXP (addr
.disp
, 0);
13413 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13414 unspec
= XEXP (unspec
, 0);
13415 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13417 x
= XVECEXP (unspec
, 0, 0);
13418 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13419 if (unspec
!= XEXP (addr
.disp
, 0))
13420 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13423 rtx idx
= addr
.index
;
13424 if (addr
.scale
!= 1)
13425 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13426 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13429 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13430 if (MEM_P (orig_x
))
13431 x
= replace_equiv_address_nv (orig_x
, x
);
13435 /* In the name of slightly smaller debug output, and to cater to
13436 general assembler lossage, recognize PIC+GOTOFF and turn it back
13437 into a direct symbol reference.
13439 On Darwin, this is necessary to avoid a crash, because Darwin
13440 has a different PIC label for each routine but the DWARF debugging
13441 information is not associated with any particular routine, so it's
13442 necessary to remove references to the PIC label from RTL stored by
13443 the DWARF output code. */
13446 ix86_delegitimize_address (rtx x
)
13448 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13449 /* addend is NULL or some rtx if x is something+GOTOFF where
13450 something doesn't include the PIC register. */
13451 rtx addend
= NULL_RTX
;
13452 /* reg_addend is NULL or a multiple of some register. */
13453 rtx reg_addend
= NULL_RTX
;
13454 /* const_addend is NULL or a const_int. */
13455 rtx const_addend
= NULL_RTX
;
13456 /* This is the result, or NULL. */
13457 rtx result
= NULL_RTX
;
13466 if (GET_CODE (x
) == CONST
13467 && GET_CODE (XEXP (x
, 0)) == PLUS
13468 && GET_MODE (XEXP (x
, 0)) == Pmode
13469 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13470 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13471 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13473 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13474 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13475 if (MEM_P (orig_x
))
13476 x
= replace_equiv_address_nv (orig_x
, x
);
13479 if (GET_CODE (x
) != CONST
13480 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13481 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13482 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13483 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13484 return ix86_delegitimize_tls_address (orig_x
);
13485 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13486 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13488 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13496 if (GET_CODE (x
) != PLUS
13497 || GET_CODE (XEXP (x
, 1)) != CONST
)
13498 return ix86_delegitimize_tls_address (orig_x
);
13500 if (ix86_pic_register_p (XEXP (x
, 0)))
13501 /* %ebx + GOT/GOTOFF */
13503 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13505 /* %ebx + %reg * scale + GOT/GOTOFF */
13506 reg_addend
= XEXP (x
, 0);
13507 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13508 reg_addend
= XEXP (reg_addend
, 1);
13509 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13510 reg_addend
= XEXP (reg_addend
, 0);
13513 reg_addend
= NULL_RTX
;
13514 addend
= XEXP (x
, 0);
13518 addend
= XEXP (x
, 0);
13520 x
= XEXP (XEXP (x
, 1), 0);
13521 if (GET_CODE (x
) == PLUS
13522 && CONST_INT_P (XEXP (x
, 1)))
13524 const_addend
= XEXP (x
, 1);
13528 if (GET_CODE (x
) == UNSPEC
13529 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13530 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13531 result
= XVECEXP (x
, 0, 0);
13533 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13534 && !MEM_P (orig_x
))
13535 result
= XVECEXP (x
, 0, 0);
13538 return ix86_delegitimize_tls_address (orig_x
);
13541 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13543 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13546 /* If the rest of original X doesn't involve the PIC register, add
13547 addend and subtract pic_offset_table_rtx. This can happen e.g.
13549 leal (%ebx, %ecx, 4), %ecx
13551 movl foo@GOTOFF(%ecx), %edx
13552 in which case we return (%ecx - %ebx) + foo. */
13553 if (pic_offset_table_rtx
)
13554 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13555 pic_offset_table_rtx
),
13560 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13562 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13563 if (result
== NULL_RTX
)
13569 /* If X is a machine specific address (i.e. a symbol or label being
13570 referenced as a displacement from the GOT implemented using an
13571 UNSPEC), then return the base term. Otherwise return X. */
13574 ix86_find_base_term (rtx x
)
13580 if (GET_CODE (x
) != CONST
)
13582 term
= XEXP (x
, 0);
13583 if (GET_CODE (term
) == PLUS
13584 && (CONST_INT_P (XEXP (term
, 1))
13585 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13586 term
= XEXP (term
, 0);
13587 if (GET_CODE (term
) != UNSPEC
13588 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13589 && XINT (term
, 1) != UNSPEC_PCREL
))
13592 return XVECEXP (term
, 0, 0);
13595 return ix86_delegitimize_address (x
);
13599 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
13600 int fp
, FILE *file
)
13602 const char *suffix
;
13604 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13606 code
= ix86_fp_compare_code_to_integer (code
);
13610 code
= reverse_condition (code
);
13661 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13665 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13666 Those same assemblers have the same but opposite lossage on cmov. */
13667 if (mode
== CCmode
)
13668 suffix
= fp
? "nbe" : "a";
13669 else if (mode
== CCCmode
)
13672 gcc_unreachable ();
13688 gcc_unreachable ();
13692 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13709 gcc_unreachable ();
13713 /* ??? As above. */
13714 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13715 suffix
= fp
? "nb" : "ae";
13718 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13722 /* ??? As above. */
13723 if (mode
== CCmode
)
13725 else if (mode
== CCCmode
)
13726 suffix
= fp
? "nb" : "ae";
13728 gcc_unreachable ();
13731 suffix
= fp
? "u" : "p";
13734 suffix
= fp
? "nu" : "np";
13737 gcc_unreachable ();
13739 fputs (suffix
, file
);
13742 /* Print the name of register X to FILE based on its machine mode and number.
13743 If CODE is 'w', pretend the mode is HImode.
13744 If CODE is 'b', pretend the mode is QImode.
13745 If CODE is 'k', pretend the mode is SImode.
13746 If CODE is 'q', pretend the mode is DImode.
13747 If CODE is 'x', pretend the mode is V4SFmode.
13748 If CODE is 't', pretend the mode is V8SFmode.
13749 If CODE is 'h', pretend the reg is the 'high' byte register.
13750 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13751 If CODE is 'd', duplicate the operand for AVX instruction.
13755 print_reg (rtx x
, int code
, FILE *file
)
13758 bool duplicated
= code
== 'd' && TARGET_AVX
;
13760 gcc_assert (x
== pc_rtx
13761 || (REGNO (x
) != ARG_POINTER_REGNUM
13762 && REGNO (x
) != FRAME_POINTER_REGNUM
13763 && REGNO (x
) != FLAGS_REG
13764 && REGNO (x
) != FPSR_REG
13765 && REGNO (x
) != FPCR_REG
));
13767 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13772 gcc_assert (TARGET_64BIT
);
13773 fputs ("rip", file
);
13777 if (code
== 'w' || MMX_REG_P (x
))
13779 else if (code
== 'b')
13781 else if (code
== 'k')
13783 else if (code
== 'q')
13785 else if (code
== 'y')
13787 else if (code
== 'h')
13789 else if (code
== 'x')
13791 else if (code
== 't')
13794 code
= GET_MODE_SIZE (GET_MODE (x
));
13796 /* Irritatingly, AMD extended registers use different naming convention
13797 from the normal registers: "r%d[bwd]" */
13798 if (REX_INT_REG_P (x
))
13800 gcc_assert (TARGET_64BIT
);
13802 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13806 error ("extended registers have no high halves");
13821 error ("unsupported operand size for extended register");
13831 if (STACK_TOP_P (x
))
13840 if (! ANY_FP_REG_P (x
))
13841 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13846 reg
= hi_reg_name
[REGNO (x
)];
13849 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13851 reg
= qi_reg_name
[REGNO (x
)];
13854 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13856 reg
= qi_high_reg_name
[REGNO (x
)];
13861 gcc_assert (!duplicated
);
13863 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13868 gcc_unreachable ();
13874 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13875 fprintf (file
, ", %%%s", reg
);
13877 fprintf (file
, ", %s", reg
);
13881 /* Locate some local-dynamic symbol still in use by this function
13882 so that we can print its name in some tls_local_dynamic_base
13886 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13890 if (GET_CODE (x
) == SYMBOL_REF
13891 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13893 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13900 static const char *
13901 get_some_local_dynamic_name (void)
13905 if (cfun
->machine
->some_ld_name
)
13906 return cfun
->machine
->some_ld_name
;
13908 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13909 if (NONDEBUG_INSN_P (insn
)
13910 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13911 return cfun
->machine
->some_ld_name
;
13916 /* Meaning of CODE:
13917 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13918 C -- print opcode suffix for set/cmov insn.
13919 c -- like C, but print reversed condition
13920 F,f -- likewise, but for floating-point.
13921 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13923 R -- print the prefix for register names.
13924 z -- print the opcode suffix for the size of the current operand.
13925 Z -- likewise, with special suffixes for x87 instructions.
13926 * -- print a star (in certain assembler syntax)
13927 A -- print an absolute memory reference.
13928 E -- print address with DImode register names if TARGET_64BIT.
13929 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13930 s -- print a shift double count, followed by the assemblers argument
13932 b -- print the QImode name of the register for the indicated operand.
13933 %b0 would print %al if operands[0] is reg 0.
13934 w -- likewise, print the HImode name of the register.
13935 k -- likewise, print the SImode name of the register.
13936 q -- likewise, print the DImode name of the register.
13937 x -- likewise, print the V4SFmode name of the register.
13938 t -- likewise, print the V8SFmode name of the register.
13939 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13940 y -- print "st(0)" instead of "st" as a register.
13941 d -- print duplicated register operand for AVX instruction.
13942 D -- print condition for SSE cmp instruction.
13943 P -- if PIC, print an @PLT suffix.
13944 p -- print raw symbol name.
13945 X -- don't print any sort of PIC '@' suffix for a symbol.
13946 & -- print some in-use local-dynamic symbol name.
13947 H -- print a memory address offset by 8; used for sse high-parts
13948 Y -- print condition for XOP pcom* instruction.
13949 + -- print a branch hint as 'cs' or 'ds' prefix
13950 ; -- print a semicolon (after prefixes due to bug in older gas).
13951 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13952 @ -- print a segment register of thread base pointer load
13953 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13957 ix86_print_operand (FILE *file
, rtx x
, int code
)
13964 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13970 const char *name
= get_some_local_dynamic_name ();
13972 output_operand_lossage ("'%%&' used without any "
13973 "local dynamic TLS references");
13975 assemble_name (file
, name
);
13980 switch (ASSEMBLER_DIALECT
)
13987 /* Intel syntax. For absolute addresses, registers should not
13988 be surrounded by braces. */
13992 ix86_print_operand (file
, x
, 0);
13999 gcc_unreachable ();
14002 ix86_print_operand (file
, x
, 0);
14006 /* Wrap address in an UNSPEC to declare special handling. */
14008 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14010 output_address (x
);
14014 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14019 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14024 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14029 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14034 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14039 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14044 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14046 /* Opcodes don't get size suffixes if using Intel opcodes. */
14047 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14050 switch (GET_MODE_SIZE (GET_MODE (x
)))
14069 output_operand_lossage
14070 ("invalid operand size for operand code '%c'", code
);
14075 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14077 (0, "non-integer operand used with operand code '%c'", code
);
14081 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14082 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14085 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14087 switch (GET_MODE_SIZE (GET_MODE (x
)))
14090 #ifdef HAVE_AS_IX86_FILDS
14100 #ifdef HAVE_AS_IX86_FILDQ
14103 fputs ("ll", file
);
14111 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14113 /* 387 opcodes don't get size suffixes
14114 if the operands are registers. */
14115 if (STACK_REG_P (x
))
14118 switch (GET_MODE_SIZE (GET_MODE (x
)))
14139 output_operand_lossage
14140 ("invalid operand type used with operand code '%c'", code
);
14144 output_operand_lossage
14145 ("invalid operand size for operand code '%c'", code
);
14163 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14165 ix86_print_operand (file
, x
, 0);
14166 fputs (", ", file
);
14171 /* Little bit of braindamage here. The SSE compare instructions
14172 does use completely different names for the comparisons that the
14173 fp conditional moves. */
14176 switch (GET_CODE (x
))
14179 fputs ("eq", file
);
14182 fputs ("eq_us", file
);
14185 fputs ("lt", file
);
14188 fputs ("nge", file
);
14191 fputs ("le", file
);
14194 fputs ("ngt", file
);
14197 fputs ("unord", file
);
14200 fputs ("neq", file
);
14203 fputs ("neq_oq", file
);
14206 fputs ("ge", file
);
14209 fputs ("nlt", file
);
14212 fputs ("gt", file
);
14215 fputs ("nle", file
);
14218 fputs ("ord", file
);
14221 output_operand_lossage ("operand is not a condition code, "
14222 "invalid operand code 'D'");
14228 switch (GET_CODE (x
))
14232 fputs ("eq", file
);
14236 fputs ("lt", file
);
14240 fputs ("le", file
);
14243 fputs ("unord", file
);
14247 fputs ("neq", file
);
14251 fputs ("nlt", file
);
14255 fputs ("nle", file
);
14258 fputs ("ord", file
);
14261 output_operand_lossage ("operand is not a condition code, "
14262 "invalid operand code 'D'");
14268 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14269 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14271 switch (GET_MODE (x
))
14273 case HImode
: putc ('w', file
); break;
14275 case SFmode
: putc ('l', file
); break;
14277 case DFmode
: putc ('q', file
); break;
14278 default: gcc_unreachable ();
14285 if (!COMPARISON_P (x
))
14287 output_operand_lossage ("operand is neither a constant nor a "
14288 "condition code, invalid operand code "
14292 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
14295 if (!COMPARISON_P (x
))
14297 output_operand_lossage ("operand is neither a constant nor a "
14298 "condition code, invalid operand code "
14302 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14303 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14306 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
14309 /* Like above, but reverse condition */
14311 /* Check to see if argument to %c is really a constant
14312 and not a condition code which needs to be reversed. */
14313 if (!COMPARISON_P (x
))
14315 output_operand_lossage ("operand is neither a constant nor a "
14316 "condition code, invalid operand "
14320 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
14323 if (!COMPARISON_P (x
))
14325 output_operand_lossage ("operand is neither a constant nor a "
14326 "condition code, invalid operand "
14330 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14331 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14334 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
14338 if (!offsettable_memref_p (x
))
14340 output_operand_lossage ("operand is not an offsettable memory "
14341 "reference, invalid operand "
14345 /* It doesn't actually matter what mode we use here, as we're
14346 only going to use this for printing. */
14347 x
= adjust_address_nv (x
, DImode
, 8);
14351 gcc_assert (CONST_INT_P (x
));
14353 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14354 #ifdef HAVE_AS_IX86_HLE
14355 fputs ("xacquire ", file
);
14357 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14359 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14360 #ifdef HAVE_AS_IX86_HLE
14361 fputs ("xrelease ", file
);
14363 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14365 /* We do not want to print value of the operand. */
14373 || optimize_function_for_size_p (cfun
)
14374 || !TARGET_BRANCH_PREDICTION_HINTS
)
14377 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14380 int pred_val
= INTVAL (XEXP (x
, 0));
14382 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14383 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14385 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14387 = final_forward_branch_p (current_output_insn
) == 0;
14389 /* Emit hints only in the case default branch prediction
14390 heuristics would fail. */
14391 if (taken
!= cputaken
)
14393 /* We use 3e (DS) prefix for taken branches and
14394 2e (CS) prefix for not taken branches. */
14396 fputs ("ds ; ", file
);
14398 fputs ("cs ; ", file
);
14406 switch (GET_CODE (x
))
14409 fputs ("neq", file
);
14412 fputs ("eq", file
);
14416 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14420 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14424 fputs ("le", file
);
14428 fputs ("lt", file
);
14431 fputs ("unord", file
);
14434 fputs ("ord", file
);
14437 fputs ("ueq", file
);
14440 fputs ("nlt", file
);
14443 fputs ("nle", file
);
14446 fputs ("ule", file
);
14449 fputs ("ult", file
);
14452 fputs ("une", file
);
14455 output_operand_lossage ("operand is not a condition code, "
14456 "invalid operand code 'Y'");
14462 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14468 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14471 /* The kernel uses a different segment register for performance
14472 reasons; a system call would not have to trash the userspace
14473 segment register, which would be expensive. */
14474 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14475 fputs ("fs", file
);
14477 fputs ("gs", file
);
14481 putc (TARGET_AVX2
? 'i' : 'f', file
);
14485 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14486 fputs ("addr32 ", file
);
14490 output_operand_lossage ("invalid operand code '%c'", code
);
14495 print_reg (x
, code
, file
);
14497 else if (MEM_P (x
))
14499 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14500 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14501 && GET_MODE (x
) != BLKmode
)
14504 switch (GET_MODE_SIZE (GET_MODE (x
)))
14506 case 1: size
= "BYTE"; break;
14507 case 2: size
= "WORD"; break;
14508 case 4: size
= "DWORD"; break;
14509 case 8: size
= "QWORD"; break;
14510 case 12: size
= "TBYTE"; break;
14512 if (GET_MODE (x
) == XFmode
)
14517 case 32: size
= "YMMWORD"; break;
14519 gcc_unreachable ();
14522 /* Check for explicit size override (codes 'b', 'w', 'k',
14526 else if (code
== 'w')
14528 else if (code
== 'k')
14530 else if (code
== 'q')
14532 else if (code
== 'x')
14535 fputs (size
, file
);
14536 fputs (" PTR ", file
);
14540 /* Avoid (%rip) for call operands. */
14541 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14542 && !CONST_INT_P (x
))
14543 output_addr_const (file
, x
);
14544 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14545 output_operand_lossage ("invalid constraints for operand");
14547 output_address (x
);
14550 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14555 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14556 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14558 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14560 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14562 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14564 fprintf (file
, "0x%08x", (unsigned int) l
);
14567 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14572 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14573 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14575 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14577 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14580 /* These float cases don't actually occur as immediate operands. */
14581 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14585 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14586 fputs (dstr
, file
);
14591 /* We have patterns that allow zero sets of memory, for instance.
14592 In 64-bit mode, we should probably support all 8-byte vectors,
14593 since we can in fact encode that into an immediate. */
14594 if (GET_CODE (x
) == CONST_VECTOR
)
14596 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14600 if (code
!= 'P' && code
!= 'p')
14602 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14604 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14607 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14608 || GET_CODE (x
) == LABEL_REF
)
14610 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14613 fputs ("OFFSET FLAT:", file
);
14616 if (CONST_INT_P (x
))
14617 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14618 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14619 output_pic_addr_const (file
, x
, code
);
14621 output_addr_const (file
, x
);
14626 ix86_print_operand_punct_valid_p (unsigned char code
)
14628 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14629 || code
== ';' || code
== '~' || code
== '^');
14632 /* Print a memory operand whose address is ADDR. */
14635 ix86_print_operand_address (FILE *file
, rtx addr
)
14637 struct ix86_address parts
;
14638 rtx base
, index
, disp
;
14644 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14646 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14647 gcc_assert (parts
.index
== NULL_RTX
);
14648 parts
.index
= XVECEXP (addr
, 0, 1);
14649 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14650 addr
= XVECEXP (addr
, 0, 0);
14653 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14655 gcc_assert (TARGET_64BIT
);
14656 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14660 ok
= ix86_decompose_address (addr
, &parts
);
14664 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14666 rtx tmp
= SUBREG_REG (parts
.base
);
14667 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14668 tmp
, GET_MODE (tmp
), 0);
14671 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14673 rtx tmp
= SUBREG_REG (parts
.index
);
14674 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14675 tmp
, GET_MODE (tmp
), 0);
14679 index
= parts
.index
;
14681 scale
= parts
.scale
;
14689 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14691 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14694 gcc_unreachable ();
14697 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14698 if (TARGET_64BIT
&& !base
&& !index
)
14702 if (GET_CODE (disp
) == CONST
14703 && GET_CODE (XEXP (disp
, 0)) == PLUS
14704 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14705 symbol
= XEXP (XEXP (disp
, 0), 0);
14707 if (GET_CODE (symbol
) == LABEL_REF
14708 || (GET_CODE (symbol
) == SYMBOL_REF
14709 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14712 if (!base
&& !index
)
14714 /* Displacement only requires special attention. */
14716 if (CONST_INT_P (disp
))
14718 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14719 fputs ("ds:", file
);
14720 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14723 output_pic_addr_const (file
, disp
, 0);
14725 output_addr_const (file
, disp
);
14729 /* Print SImode register names for zero-extended
14730 addresses to force addr32 prefix. */
14732 && (GET_CODE (addr
) == ZERO_EXTEND
14733 || GET_CODE (addr
) == AND
))
14735 gcc_assert (!code
);
14739 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14744 output_pic_addr_const (file
, disp
, 0);
14745 else if (GET_CODE (disp
) == LABEL_REF
)
14746 output_asm_label (disp
);
14748 output_addr_const (file
, disp
);
14753 print_reg (base
, code
, file
);
14757 print_reg (index
, vsib
? 0 : code
, file
);
14758 if (scale
!= 1 || vsib
)
14759 fprintf (file
, ",%d", scale
);
14765 rtx offset
= NULL_RTX
;
14769 /* Pull out the offset of a symbol; print any symbol itself. */
14770 if (GET_CODE (disp
) == CONST
14771 && GET_CODE (XEXP (disp
, 0)) == PLUS
14772 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14774 offset
= XEXP (XEXP (disp
, 0), 1);
14775 disp
= gen_rtx_CONST (VOIDmode
,
14776 XEXP (XEXP (disp
, 0), 0));
14780 output_pic_addr_const (file
, disp
, 0);
14781 else if (GET_CODE (disp
) == LABEL_REF
)
14782 output_asm_label (disp
);
14783 else if (CONST_INT_P (disp
))
14786 output_addr_const (file
, disp
);
14792 print_reg (base
, code
, file
);
14795 if (INTVAL (offset
) >= 0)
14797 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14801 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14808 print_reg (index
, vsib
? 0 : code
, file
);
14809 if (scale
!= 1 || vsib
)
14810 fprintf (file
, "*%d", scale
);
14817 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14820 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14824 if (GET_CODE (x
) != UNSPEC
)
14827 op
= XVECEXP (x
, 0, 0);
14828 switch (XINT (x
, 1))
14830 case UNSPEC_GOTTPOFF
:
14831 output_addr_const (file
, op
);
14832 /* FIXME: This might be @TPOFF in Sun ld. */
14833 fputs ("@gottpoff", file
);
14836 output_addr_const (file
, op
);
14837 fputs ("@tpoff", file
);
14839 case UNSPEC_NTPOFF
:
14840 output_addr_const (file
, op
);
14842 fputs ("@tpoff", file
);
14844 fputs ("@ntpoff", file
);
14846 case UNSPEC_DTPOFF
:
14847 output_addr_const (file
, op
);
14848 fputs ("@dtpoff", file
);
14850 case UNSPEC_GOTNTPOFF
:
14851 output_addr_const (file
, op
);
14853 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14854 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14856 fputs ("@gotntpoff", file
);
14858 case UNSPEC_INDNTPOFF
:
14859 output_addr_const (file
, op
);
14860 fputs ("@indntpoff", file
);
14863 case UNSPEC_MACHOPIC_OFFSET
:
14864 output_addr_const (file
, op
);
14866 machopic_output_function_base_name (file
);
14870 case UNSPEC_STACK_CHECK
:
14874 gcc_assert (flag_split_stack
);
14876 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14877 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14879 gcc_unreachable ();
14882 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14893 /* Split one or more double-mode RTL references into pairs of half-mode
14894 references. The RTL can be REG, offsettable MEM, integer constant, or
14895 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14896 split and "num" is its length. lo_half and hi_half are output arrays
14897 that parallel "operands". */
14900 split_double_mode (enum machine_mode mode
, rtx operands
[],
14901 int num
, rtx lo_half
[], rtx hi_half
[])
14903 enum machine_mode half_mode
;
14909 half_mode
= DImode
;
14912 half_mode
= SImode
;
14915 gcc_unreachable ();
14918 byte
= GET_MODE_SIZE (half_mode
);
14922 rtx op
= operands
[num
];
14924 /* simplify_subreg refuse to split volatile memory addresses,
14925 but we still have to handle it. */
14928 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14929 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14933 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14934 GET_MODE (op
) == VOIDmode
14935 ? mode
: GET_MODE (op
), 0);
14936 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14937 GET_MODE (op
) == VOIDmode
14938 ? mode
: GET_MODE (op
), byte
);
14943 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14944 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14945 is the expression of the binary operation. The output may either be
14946 emitted here, or returned to the caller, like all output_* functions.
14948 There is no guarantee that the operands are the same mode, as they
14949 might be within FLOAT or FLOAT_EXTEND expressions. */
14951 #ifndef SYSV386_COMPAT
14952 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14953 wants to fix the assemblers because that causes incompatibility
14954 with gcc. No-one wants to fix gcc because that causes
14955 incompatibility with assemblers... You can use the option of
14956 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14957 #define SYSV386_COMPAT 1
14961 output_387_binary_op (rtx insn
, rtx
*operands
)
14963 static char buf
[40];
14966 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14968 #ifdef ENABLE_CHECKING
14969 /* Even if we do not want to check the inputs, this documents input
14970 constraints. Which helps in understanding the following code. */
14971 if (STACK_REG_P (operands
[0])
14972 && ((REG_P (operands
[1])
14973 && REGNO (operands
[0]) == REGNO (operands
[1])
14974 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14975 || (REG_P (operands
[2])
14976 && REGNO (operands
[0]) == REGNO (operands
[2])
14977 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14978 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14981 gcc_assert (is_sse
);
14984 switch (GET_CODE (operands
[3]))
14987 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14988 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14996 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14997 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15005 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15006 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15014 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15015 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15023 gcc_unreachable ();
15030 strcpy (buf
, ssep
);
15031 if (GET_MODE (operands
[0]) == SFmode
)
15032 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15034 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15038 strcpy (buf
, ssep
+ 1);
15039 if (GET_MODE (operands
[0]) == SFmode
)
15040 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15042 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15048 switch (GET_CODE (operands
[3]))
15052 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15054 rtx temp
= operands
[2];
15055 operands
[2] = operands
[1];
15056 operands
[1] = temp
;
15059 /* know operands[0] == operands[1]. */
15061 if (MEM_P (operands
[2]))
15067 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15069 if (STACK_TOP_P (operands
[0]))
15070 /* How is it that we are storing to a dead operand[2]?
15071 Well, presumably operands[1] is dead too. We can't
15072 store the result to st(0) as st(0) gets popped on this
15073 instruction. Instead store to operands[2] (which I
15074 think has to be st(1)). st(1) will be popped later.
15075 gcc <= 2.8.1 didn't have this check and generated
15076 assembly code that the Unixware assembler rejected. */
15077 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15079 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15083 if (STACK_TOP_P (operands
[0]))
15084 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15086 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15091 if (MEM_P (operands
[1]))
15097 if (MEM_P (operands
[2]))
15103 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15106 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15107 derived assemblers, confusingly reverse the direction of
15108 the operation for fsub{r} and fdiv{r} when the
15109 destination register is not st(0). The Intel assembler
15110 doesn't have this brain damage. Read !SYSV386_COMPAT to
15111 figure out what the hardware really does. */
15112 if (STACK_TOP_P (operands
[0]))
15113 p
= "{p\t%0, %2|rp\t%2, %0}";
15115 p
= "{rp\t%2, %0|p\t%0, %2}";
15117 if (STACK_TOP_P (operands
[0]))
15118 /* As above for fmul/fadd, we can't store to st(0). */
15119 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15121 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15126 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15129 if (STACK_TOP_P (operands
[0]))
15130 p
= "{rp\t%0, %1|p\t%1, %0}";
15132 p
= "{p\t%1, %0|rp\t%0, %1}";
15134 if (STACK_TOP_P (operands
[0]))
15135 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15137 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15142 if (STACK_TOP_P (operands
[0]))
15144 if (STACK_TOP_P (operands
[1]))
15145 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15147 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15150 else if (STACK_TOP_P (operands
[1]))
15153 p
= "{\t%1, %0|r\t%0, %1}";
15155 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15161 p
= "{r\t%2, %0|\t%0, %2}";
15163 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15169 gcc_unreachable ();
15176 /* Return needed mode for entity in optimize_mode_switching pass. */
15179 ix86_mode_needed (int entity
, rtx insn
)
15181 enum attr_i387_cw mode
;
15183 /* The mode UNINITIALIZED is used to store control word after a
15184 function call or ASM pattern. The mode ANY specify that function
15185 has no requirements on the control word and make no changes in the
15186 bits we are interested in. */
15189 || (NONJUMP_INSN_P (insn
)
15190 && (asm_noperands (PATTERN (insn
)) >= 0
15191 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15192 return I387_CW_UNINITIALIZED
;
15194 if (recog_memoized (insn
) < 0)
15195 return I387_CW_ANY
;
15197 mode
= get_attr_i387_cw (insn
);
15202 if (mode
== I387_CW_TRUNC
)
15207 if (mode
== I387_CW_FLOOR
)
15212 if (mode
== I387_CW_CEIL
)
15217 if (mode
== I387_CW_MASK_PM
)
15222 gcc_unreachable ();
15225 return I387_CW_ANY
;
15228 /* Output code to initialize control word copies used by trunc?f?i and
15229 rounding patterns. CURRENT_MODE is set to current control word,
15230 while NEW_MODE is set to new control word. */
15233 emit_i387_cw_initialization (int mode
)
15235 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15238 enum ix86_stack_slot slot
;
15240 rtx reg
= gen_reg_rtx (HImode
);
15242 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15243 emit_move_insn (reg
, copy_rtx (stored_mode
));
15245 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15246 || optimize_function_for_size_p (cfun
))
15250 case I387_CW_TRUNC
:
15251 /* round toward zero (truncate) */
15252 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15253 slot
= SLOT_CW_TRUNC
;
15256 case I387_CW_FLOOR
:
15257 /* round down toward -oo */
15258 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15259 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15260 slot
= SLOT_CW_FLOOR
;
15264 /* round up toward +oo */
15265 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15266 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15267 slot
= SLOT_CW_CEIL
;
15270 case I387_CW_MASK_PM
:
15271 /* mask precision exception for nearbyint() */
15272 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15273 slot
= SLOT_CW_MASK_PM
;
15277 gcc_unreachable ();
15284 case I387_CW_TRUNC
:
15285 /* round toward zero (truncate) */
15286 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15287 slot
= SLOT_CW_TRUNC
;
15290 case I387_CW_FLOOR
:
15291 /* round down toward -oo */
15292 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15293 slot
= SLOT_CW_FLOOR
;
15297 /* round up toward +oo */
15298 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15299 slot
= SLOT_CW_CEIL
;
15302 case I387_CW_MASK_PM
:
15303 /* mask precision exception for nearbyint() */
15304 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15305 slot
= SLOT_CW_MASK_PM
;
15309 gcc_unreachable ();
15313 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15315 new_mode
= assign_386_stack_local (HImode
, slot
);
15316 emit_move_insn (new_mode
, reg
);
15319 /* Output code for INSN to convert a float to a signed int. OPERANDS
15320 are the insn operands. The output may be [HSD]Imode and the input
15321 operand may be [SDX]Fmode. */
15324 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15326 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15327 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15328 int round_mode
= get_attr_i387_cw (insn
);
15330 /* Jump through a hoop or two for DImode, since the hardware has no
15331 non-popping instruction. We used to do this a different way, but
15332 that was somewhat fragile and broke with post-reload splitters. */
15333 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15334 output_asm_insn ("fld\t%y1", operands
);
15336 gcc_assert (STACK_TOP_P (operands
[1]));
15337 gcc_assert (MEM_P (operands
[0]));
15338 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15341 output_asm_insn ("fisttp%Z0\t%0", operands
);
15344 if (round_mode
!= I387_CW_ANY
)
15345 output_asm_insn ("fldcw\t%3", operands
);
15346 if (stack_top_dies
|| dimode_p
)
15347 output_asm_insn ("fistp%Z0\t%0", operands
);
15349 output_asm_insn ("fist%Z0\t%0", operands
);
15350 if (round_mode
!= I387_CW_ANY
)
15351 output_asm_insn ("fldcw\t%2", operands
);
15357 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15358 have the values zero or one, indicates the ffreep insn's operand
15359 from the OPERANDS array. */
15361 static const char *
15362 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15364 if (TARGET_USE_FFREEP
)
15365 #ifdef HAVE_AS_IX86_FFREEP
15366 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15369 static char retval
[32];
15370 int regno
= REGNO (operands
[opno
]);
15372 gcc_assert (FP_REGNO_P (regno
));
15374 regno
-= FIRST_STACK_REG
;
15376 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15381 return opno
? "fstp\t%y1" : "fstp\t%y0";
15385 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15386 should be used. UNORDERED_P is true when fucom should be used. */
15389 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15391 int stack_top_dies
;
15392 rtx cmp_op0
, cmp_op1
;
15393 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15397 cmp_op0
= operands
[0];
15398 cmp_op1
= operands
[1];
15402 cmp_op0
= operands
[1];
15403 cmp_op1
= operands
[2];
15408 if (GET_MODE (operands
[0]) == SFmode
)
15410 return "%vucomiss\t{%1, %0|%0, %1}";
15412 return "%vcomiss\t{%1, %0|%0, %1}";
15415 return "%vucomisd\t{%1, %0|%0, %1}";
15417 return "%vcomisd\t{%1, %0|%0, %1}";
15420 gcc_assert (STACK_TOP_P (cmp_op0
));
15422 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15424 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15426 if (stack_top_dies
)
15428 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15429 return output_387_ffreep (operands
, 1);
15432 return "ftst\n\tfnstsw\t%0";
15435 if (STACK_REG_P (cmp_op1
)
15437 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15438 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15440 /* If both the top of the 387 stack dies, and the other operand
15441 is also a stack register that dies, then this must be a
15442 `fcompp' float compare */
15446 /* There is no double popping fcomi variant. Fortunately,
15447 eflags is immune from the fstp's cc clobbering. */
15449 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15451 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15452 return output_387_ffreep (operands
, 0);
15457 return "fucompp\n\tfnstsw\t%0";
15459 return "fcompp\n\tfnstsw\t%0";
15464 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15466 static const char * const alt
[16] =
15468 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15469 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15470 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15471 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15473 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15474 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15478 "fcomi\t{%y1, %0|%0, %y1}",
15479 "fcomip\t{%y1, %0|%0, %y1}",
15480 "fucomi\t{%y1, %0|%0, %y1}",
15481 "fucomip\t{%y1, %0|%0, %y1}",
15492 mask
= eflags_p
<< 3;
15493 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15494 mask
|= unordered_p
<< 1;
15495 mask
|= stack_top_dies
;
15497 gcc_assert (mask
< 16);
15506 ix86_output_addr_vec_elt (FILE *file
, int value
)
15508 const char *directive
= ASM_LONG
;
15512 directive
= ASM_QUAD
;
15514 gcc_assert (!TARGET_64BIT
);
15517 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15521 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15523 const char *directive
= ASM_LONG
;
15526 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15527 directive
= ASM_QUAD
;
15529 gcc_assert (!TARGET_64BIT
);
15531 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15532 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15533 fprintf (file
, "%s%s%d-%s%d\n",
15534 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15535 else if (HAVE_AS_GOTOFF_IN_DATA
)
15536 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15538 else if (TARGET_MACHO
)
15540 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15541 machopic_output_function_base_name (file
);
15546 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15547 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15550 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15554 ix86_expand_clear (rtx dest
)
15558 /* We play register width games, which are only valid after reload. */
15559 gcc_assert (reload_completed
);
15561 /* Avoid HImode and its attendant prefix byte. */
15562 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15563 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15564 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15566 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15567 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15569 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15570 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15576 /* X is an unchanging MEM. If it is a constant pool reference, return
15577 the constant pool rtx, else NULL. */
15580 maybe_get_pool_constant (rtx x
)
15582 x
= ix86_delegitimize_address (XEXP (x
, 0));
15584 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15585 return get_pool_constant (x
);
15591 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15594 enum tls_model model
;
15599 if (GET_CODE (op1
) == SYMBOL_REF
)
15601 model
= SYMBOL_REF_TLS_MODEL (op1
);
15604 op1
= legitimize_tls_address (op1
, model
, true);
15605 op1
= force_operand (op1
, op0
);
15608 if (GET_MODE (op1
) != mode
)
15609 op1
= convert_to_mode (mode
, op1
, 1);
15611 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15612 && SYMBOL_REF_DLLIMPORT_P (op1
))
15613 op1
= legitimize_dllimport_symbol (op1
, false);
15615 else if (GET_CODE (op1
) == CONST
15616 && GET_CODE (XEXP (op1
, 0)) == PLUS
15617 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15619 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15620 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15623 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15625 tmp
= legitimize_tls_address (symbol
, model
, true);
15626 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15627 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15628 tmp
= legitimize_dllimport_symbol (symbol
, true);
15632 tmp
= force_operand (tmp
, NULL
);
15633 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15634 op0
, 1, OPTAB_DIRECT
);
15637 if (GET_MODE (tmp
) != mode
)
15638 op1
= convert_to_mode (mode
, tmp
, 1);
15642 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15643 && symbolic_operand (op1
, mode
))
15645 if (TARGET_MACHO
&& !TARGET_64BIT
)
15648 /* dynamic-no-pic */
15649 if (MACHOPIC_INDIRECT
)
15651 rtx temp
= ((reload_in_progress
15652 || ((op0
&& REG_P (op0
))
15654 ? op0
: gen_reg_rtx (Pmode
));
15655 op1
= machopic_indirect_data_reference (op1
, temp
);
15657 op1
= machopic_legitimize_pic_address (op1
, mode
,
15658 temp
== op1
? 0 : temp
);
15660 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15662 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15666 if (GET_CODE (op0
) == MEM
)
15667 op1
= force_reg (Pmode
, op1
);
15671 if (GET_CODE (temp
) != REG
)
15672 temp
= gen_reg_rtx (Pmode
);
15673 temp
= legitimize_pic_address (op1
, temp
);
15678 /* dynamic-no-pic */
15684 op1
= force_reg (mode
, op1
);
15685 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15687 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15688 op1
= legitimize_pic_address (op1
, reg
);
15691 if (GET_MODE (op1
) != mode
)
15692 op1
= convert_to_mode (mode
, op1
, 1);
15699 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15700 || !push_operand (op0
, mode
))
15702 op1
= force_reg (mode
, op1
);
15704 if (push_operand (op0
, mode
)
15705 && ! general_no_elim_operand (op1
, mode
))
15706 op1
= copy_to_mode_reg (mode
, op1
);
15708 /* Force large constants in 64bit compilation into register
15709 to get them CSEed. */
15710 if (can_create_pseudo_p ()
15711 && (mode
== DImode
) && TARGET_64BIT
15712 && immediate_operand (op1
, mode
)
15713 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15714 && !register_operand (op0
, mode
)
15716 op1
= copy_to_mode_reg (mode
, op1
);
15718 if (can_create_pseudo_p ()
15719 && FLOAT_MODE_P (mode
)
15720 && GET_CODE (op1
) == CONST_DOUBLE
)
15722 /* If we are loading a floating point constant to a register,
15723 force the value to memory now, since we'll get better code
15724 out the back end. */
15726 op1
= validize_mem (force_const_mem (mode
, op1
));
15727 if (!register_operand (op0
, mode
))
15729 rtx temp
= gen_reg_rtx (mode
);
15730 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15731 emit_move_insn (op0
, temp
);
15737 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15741 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15743 rtx op0
= operands
[0], op1
= operands
[1];
15744 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15746 /* Force constants other than zero into memory. We do not know how
15747 the instructions used to build constants modify the upper 64 bits
15748 of the register, once we have that information we may be able
15749 to handle some of them more efficiently. */
15750 if (can_create_pseudo_p ()
15751 && register_operand (op0
, mode
)
15752 && (CONSTANT_P (op1
)
15753 || (GET_CODE (op1
) == SUBREG
15754 && CONSTANT_P (SUBREG_REG (op1
))))
15755 && !standard_sse_constant_p (op1
))
15756 op1
= validize_mem (force_const_mem (mode
, op1
));
15758 /* We need to check memory alignment for SSE mode since attribute
15759 can make operands unaligned. */
15760 if (can_create_pseudo_p ()
15761 && SSE_REG_MODE_P (mode
)
15762 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15763 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15767 /* ix86_expand_vector_move_misalign() does not like constants ... */
15768 if (CONSTANT_P (op1
)
15769 || (GET_CODE (op1
) == SUBREG
15770 && CONSTANT_P (SUBREG_REG (op1
))))
15771 op1
= validize_mem (force_const_mem (mode
, op1
));
15773 /* ... nor both arguments in memory. */
15774 if (!register_operand (op0
, mode
)
15775 && !register_operand (op1
, mode
))
15776 op1
= force_reg (mode
, op1
);
15778 tmp
[0] = op0
; tmp
[1] = op1
;
15779 ix86_expand_vector_move_misalign (mode
, tmp
);
15783 /* Make operand1 a register if it isn't already. */
15784 if (can_create_pseudo_p ()
15785 && !register_operand (op0
, mode
)
15786 && !register_operand (op1
, mode
))
15788 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15792 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15795 /* Split 32-byte AVX unaligned load and store if needed. */
15798 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15801 rtx (*extract
) (rtx
, rtx
, rtx
);
15802 rtx (*move_unaligned
) (rtx
, rtx
);
15803 enum machine_mode mode
;
15805 switch (GET_MODE (op0
))
15808 gcc_unreachable ();
15810 extract
= gen_avx_vextractf128v32qi
;
15811 move_unaligned
= gen_avx_movdqu256
;
15815 extract
= gen_avx_vextractf128v8sf
;
15816 move_unaligned
= gen_avx_movups256
;
15820 extract
= gen_avx_vextractf128v4df
;
15821 move_unaligned
= gen_avx_movupd256
;
15826 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15828 rtx r
= gen_reg_rtx (mode
);
15829 m
= adjust_address (op1
, mode
, 0);
15830 emit_move_insn (r
, m
);
15831 m
= adjust_address (op1
, mode
, 16);
15832 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15833 emit_move_insn (op0
, r
);
15835 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15837 m
= adjust_address (op0
, mode
, 0);
15838 emit_insn (extract (m
, op1
, const0_rtx
));
15839 m
= adjust_address (op0
, mode
, 16);
15840 emit_insn (extract (m
, op1
, const1_rtx
));
15843 emit_insn (move_unaligned (op0
, op1
));
15846 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15847 straight to ix86_expand_vector_move. */
15848 /* Code generation for scalar reg-reg moves of single and double precision data:
15849 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15853 if (x86_sse_partial_reg_dependency == true)
15858 Code generation for scalar loads of double precision data:
15859 if (x86_sse_split_regs == true)
15860 movlpd mem, reg (gas syntax)
15864 Code generation for unaligned packed loads of single precision data
15865 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15866 if (x86_sse_unaligned_move_optimal)
15869 if (x86_sse_partial_reg_dependency == true)
15881 Code generation for unaligned packed loads of double precision data
15882 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15883 if (x86_sse_unaligned_move_optimal)
15886 if (x86_sse_split_regs == true)
15899 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15907 && GET_MODE_SIZE (mode
) == 32)
15909 switch (GET_MODE_CLASS (mode
))
15911 case MODE_VECTOR_INT
:
15913 op0
= gen_lowpart (V32QImode
, op0
);
15914 op1
= gen_lowpart (V32QImode
, op1
);
15917 case MODE_VECTOR_FLOAT
:
15918 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15922 gcc_unreachable ();
15930 /* ??? If we have typed data, then it would appear that using
15931 movdqu is the only way to get unaligned data loaded with
15933 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15935 op0
= gen_lowpart (V16QImode
, op0
);
15936 op1
= gen_lowpart (V16QImode
, op1
);
15937 /* We will eventually emit movups based on insn attributes. */
15938 emit_insn (gen_sse2_movdqu (op0
, op1
));
15940 else if (TARGET_SSE2
&& mode
== V2DFmode
)
15945 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
15946 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
15947 || optimize_function_for_size_p (cfun
))
15949 /* We will eventually emit movups based on insn attributes. */
15950 emit_insn (gen_sse2_movupd (op0
, op1
));
15954 /* When SSE registers are split into halves, we can avoid
15955 writing to the top half twice. */
15956 if (TARGET_SSE_SPLIT_REGS
)
15958 emit_clobber (op0
);
15963 /* ??? Not sure about the best option for the Intel chips.
15964 The following would seem to satisfy; the register is
15965 entirely cleared, breaking the dependency chain. We
15966 then store to the upper half, with a dependency depth
15967 of one. A rumor has it that Intel recommends two movsd
15968 followed by an unpacklpd, but this is unconfirmed. And
15969 given that the dependency depth of the unpacklpd would
15970 still be one, I'm not sure why this would be better. */
15971 zero
= CONST0_RTX (V2DFmode
);
15974 m
= adjust_address (op1
, DFmode
, 0);
15975 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15976 m
= adjust_address (op1
, DFmode
, 8);
15977 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15982 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
15983 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
15984 || optimize_function_for_size_p (cfun
))
15986 op0
= gen_lowpart (V4SFmode
, op0
);
15987 op1
= gen_lowpart (V4SFmode
, op1
);
15988 emit_insn (gen_sse_movups (op0
, op1
));
15992 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15993 emit_move_insn (op0
, CONST0_RTX (mode
));
15995 emit_clobber (op0
);
15997 if (mode
!= V4SFmode
)
15998 op0
= gen_lowpart (V4SFmode
, op0
);
16000 m
= adjust_address (op1
, V2SFmode
, 0);
16001 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16002 m
= adjust_address (op1
, V2SFmode
, 8);
16003 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16006 else if (MEM_P (op0
))
16008 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16010 op0
= gen_lowpart (V16QImode
, op0
);
16011 op1
= gen_lowpart (V16QImode
, op1
);
16012 /* We will eventually emit movups based on insn attributes. */
16013 emit_insn (gen_sse2_movdqu (op0
, op1
));
16015 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16018 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16019 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16020 || optimize_function_for_size_p (cfun
))
16021 /* We will eventually emit movups based on insn attributes. */
16022 emit_insn (gen_sse2_movupd (op0
, op1
));
16025 m
= adjust_address (op0
, DFmode
, 0);
16026 emit_insn (gen_sse2_storelpd (m
, op1
));
16027 m
= adjust_address (op0
, DFmode
, 8);
16028 emit_insn (gen_sse2_storehpd (m
, op1
));
16033 if (mode
!= V4SFmode
)
16034 op1
= gen_lowpart (V4SFmode
, op1
);
16037 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16038 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16039 || optimize_function_for_size_p (cfun
))
16041 op0
= gen_lowpart (V4SFmode
, op0
);
16042 emit_insn (gen_sse_movups (op0
, op1
));
16046 m
= adjust_address (op0
, V2SFmode
, 0);
16047 emit_insn (gen_sse_storelps (m
, op1
));
16048 m
= adjust_address (op0
, V2SFmode
, 8);
16049 emit_insn (gen_sse_storehps (m
, op1
));
16054 gcc_unreachable ();
16057 /* Expand a push in MODE. This is some mode for which we do not support
16058 proper push instructions, at least from the registers that we expect
16059 the value to live in. */
16062 ix86_expand_push (enum machine_mode mode
, rtx x
)
16066 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16067 GEN_INT (-GET_MODE_SIZE (mode
)),
16068 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16069 if (tmp
!= stack_pointer_rtx
)
16070 emit_move_insn (stack_pointer_rtx
, tmp
);
16072 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16074 /* When we push an operand onto stack, it has to be aligned at least
16075 at the function argument boundary. However since we don't have
16076 the argument type, we can't determine the actual argument
16078 emit_move_insn (tmp
, x
);
16081 /* Helper function of ix86_fixup_binary_operands to canonicalize
16082 operand order. Returns true if the operands should be swapped. */
16085 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16088 rtx dst
= operands
[0];
16089 rtx src1
= operands
[1];
16090 rtx src2
= operands
[2];
16092 /* If the operation is not commutative, we can't do anything. */
16093 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16096 /* Highest priority is that src1 should match dst. */
16097 if (rtx_equal_p (dst
, src1
))
16099 if (rtx_equal_p (dst
, src2
))
16102 /* Next highest priority is that immediate constants come second. */
16103 if (immediate_operand (src2
, mode
))
16105 if (immediate_operand (src1
, mode
))
16108 /* Lowest priority is that memory references should come second. */
16118 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16119 destination to use for the operation. If different from the true
16120 destination in operands[0], a copy operation will be required. */
16123 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16126 rtx dst
= operands
[0];
16127 rtx src1
= operands
[1];
16128 rtx src2
= operands
[2];
16130 /* Canonicalize operand order. */
16131 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16135 /* It is invalid to swap operands of different modes. */
16136 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16143 /* Both source operands cannot be in memory. */
16144 if (MEM_P (src1
) && MEM_P (src2
))
16146 /* Optimization: Only read from memory once. */
16147 if (rtx_equal_p (src1
, src2
))
16149 src2
= force_reg (mode
, src2
);
16153 src2
= force_reg (mode
, src2
);
16156 /* If the destination is memory, and we do not have matching source
16157 operands, do things in registers. */
16158 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16159 dst
= gen_reg_rtx (mode
);
16161 /* Source 1 cannot be a constant. */
16162 if (CONSTANT_P (src1
))
16163 src1
= force_reg (mode
, src1
);
16165 /* Source 1 cannot be a non-matching memory. */
16166 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16167 src1
= force_reg (mode
, src1
);
16169 /* Improve address combine. */
16171 && GET_MODE_CLASS (mode
) == MODE_INT
16173 src2
= force_reg (mode
, src2
);
16175 operands
[1] = src1
;
16176 operands
[2] = src2
;
16180 /* Similarly, but assume that the destination has already been
16181 set up properly. */
16184 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16185 enum machine_mode mode
, rtx operands
[])
16187 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16188 gcc_assert (dst
== operands
[0]);
16191 /* Attempt to expand a binary operator. Make the expansion closer to the
16192 actual machine, then just general_operand, which will allow 3 separate
16193 memory references (one output, two input) in a single insn. */
16196 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16199 rtx src1
, src2
, dst
, op
, clob
;
16201 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16202 src1
= operands
[1];
16203 src2
= operands
[2];
16205 /* Emit the instruction. */
16207 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16208 if (reload_in_progress
)
16210 /* Reload doesn't know about the flags register, and doesn't know that
16211 it doesn't want to clobber it. We can only do this with PLUS. */
16212 gcc_assert (code
== PLUS
);
16215 else if (reload_completed
16217 && !rtx_equal_p (dst
, src1
))
16219 /* This is going to be an LEA; avoid splitting it later. */
16224 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16225 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16228 /* Fix up the destination if needed. */
16229 if (dst
!= operands
[0])
16230 emit_move_insn (operands
[0], dst
);
16233 /* Return TRUE or FALSE depending on whether the binary operator meets the
16234 appropriate constraints. */
16237 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16240 rtx dst
= operands
[0];
16241 rtx src1
= operands
[1];
16242 rtx src2
= operands
[2];
16244 /* Both source operands cannot be in memory. */
16245 if (MEM_P (src1
) && MEM_P (src2
))
16248 /* Canonicalize operand order for commutative operators. */
16249 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16256 /* If the destination is memory, we must have a matching source operand. */
16257 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16260 /* Source 1 cannot be a constant. */
16261 if (CONSTANT_P (src1
))
16264 /* Source 1 cannot be a non-matching memory. */
16265 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16266 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16267 return (code
== AND
16270 || (TARGET_64BIT
&& mode
== DImode
))
16271 && satisfies_constraint_L (src2
));
16276 /* Attempt to expand a unary operator. Make the expansion closer to the
16277 actual machine, then just general_operand, which will allow 2 separate
16278 memory references (one output, one input) in a single insn. */
16281 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16284 int matching_memory
;
16285 rtx src
, dst
, op
, clob
;
16290 /* If the destination is memory, and we do not have matching source
16291 operands, do things in registers. */
16292 matching_memory
= 0;
16295 if (rtx_equal_p (dst
, src
))
16296 matching_memory
= 1;
16298 dst
= gen_reg_rtx (mode
);
16301 /* When source operand is memory, destination must match. */
16302 if (MEM_P (src
) && !matching_memory
)
16303 src
= force_reg (mode
, src
);
16305 /* Emit the instruction. */
16307 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16308 if (reload_in_progress
|| code
== NOT
)
16310 /* Reload doesn't know about the flags register, and doesn't know that
16311 it doesn't want to clobber it. */
16312 gcc_assert (code
== NOT
);
16317 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16318 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16321 /* Fix up the destination if needed. */
16322 if (dst
!= operands
[0])
16323 emit_move_insn (operands
[0], dst
);
16326 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16327 divisor are within the range [0-255]. */
16330 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16333 rtx end_label
, qimode_label
;
16334 rtx insn
, div
, mod
;
16335 rtx scratch
, tmp0
, tmp1
, tmp2
;
16336 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16337 rtx (*gen_zero_extend
) (rtx
, rtx
);
16338 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16343 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16344 gen_test_ccno_1
= gen_testsi_ccno_1
;
16345 gen_zero_extend
= gen_zero_extendqisi2
;
16348 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16349 gen_test_ccno_1
= gen_testdi_ccno_1
;
16350 gen_zero_extend
= gen_zero_extendqidi2
;
16353 gcc_unreachable ();
16356 end_label
= gen_label_rtx ();
16357 qimode_label
= gen_label_rtx ();
16359 scratch
= gen_reg_rtx (mode
);
16361 /* Use 8bit unsigned divimod if dividend and divisor are within
16362 the range [0-255]. */
16363 emit_move_insn (scratch
, operands
[2]);
16364 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16365 scratch
, 1, OPTAB_DIRECT
);
16366 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16367 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16368 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16369 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16370 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16372 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16373 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16374 JUMP_LABEL (insn
) = qimode_label
;
16376 /* Generate original signed/unsigned divimod. */
16377 div
= gen_divmod4_1 (operands
[0], operands
[1],
16378 operands
[2], operands
[3]);
16381 /* Branch to the end. */
16382 emit_jump_insn (gen_jump (end_label
));
16385 /* Generate 8bit unsigned divide. */
16386 emit_label (qimode_label
);
16387 /* Don't use operands[0] for result of 8bit divide since not all
16388 registers support QImode ZERO_EXTRACT. */
16389 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16390 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16391 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16392 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16396 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16397 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16401 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16402 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16405 /* Extract remainder from AH. */
16406 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16407 if (REG_P (operands
[1]))
16408 insn
= emit_move_insn (operands
[1], tmp1
);
16411 /* Need a new scratch register since the old one has result
16413 scratch
= gen_reg_rtx (mode
);
16414 emit_move_insn (scratch
, tmp1
);
16415 insn
= emit_move_insn (operands
[1], scratch
);
16417 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16419 /* Zero extend quotient from AL. */
16420 tmp1
= gen_lowpart (QImode
, tmp0
);
16421 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16422 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16424 emit_label (end_label
);
16427 #define LEA_MAX_STALL (3)
16428 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16430 /* Increase given DISTANCE in half-cycles according to
16431 dependencies between PREV and NEXT instructions.
16432 Add 1 half-cycle if there is no dependency and
16433 go to next cycle if there is some dependecy. */
16435 static unsigned int
16436 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16441 if (!prev
|| !next
)
16442 return distance
+ (distance
& 1) + 2;
16444 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16445 return distance
+ 1;
16447 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16448 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16449 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16450 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16451 return distance
+ (distance
& 1) + 2;
16453 return distance
+ 1;
16456 /* Function checks if instruction INSN defines register number
16457 REGNO1 or REGNO2. */
16460 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16465 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16466 if (DF_REF_REG_DEF_P (*def_rec
)
16467 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16468 && (regno1
== DF_REF_REGNO (*def_rec
)
16469 || regno2
== DF_REF_REGNO (*def_rec
)))
16477 /* Function checks if instruction INSN uses register number
16478 REGNO as a part of address expression. */
16481 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16485 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16486 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16492 /* Search backward for non-agu definition of register number REGNO1
16493 or register number REGNO2 in basic block starting from instruction
16494 START up to head of basic block or instruction INSN.
16496 Function puts true value into *FOUND var if definition was found
16497 and false otherwise.
16499 Distance in half-cycles between START and found instruction or head
16500 of BB is added to DISTANCE and returned. */
16503 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16504 rtx insn
, int distance
,
16505 rtx start
, bool *found
)
16507 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16515 && distance
< LEA_SEARCH_THRESHOLD
)
16517 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16519 distance
= increase_distance (prev
, next
, distance
);
16520 if (insn_defines_reg (regno1
, regno2
, prev
))
16522 if (recog_memoized (prev
) < 0
16523 || get_attr_type (prev
) != TYPE_LEA
)
16532 if (prev
== BB_HEAD (bb
))
16535 prev
= PREV_INSN (prev
);
16541 /* Search backward for non-agu definition of register number REGNO1
16542 or register number REGNO2 in INSN's basic block until
16543 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16544 2. Reach neighbour BBs boundary, or
16545 3. Reach agu definition.
16546 Returns the distance between the non-agu definition point and INSN.
16547 If no definition point, returns -1. */
16550 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16553 basic_block bb
= BLOCK_FOR_INSN (insn
);
16555 bool found
= false;
16557 if (insn
!= BB_HEAD (bb
))
16558 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16559 distance
, PREV_INSN (insn
),
16562 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16566 bool simple_loop
= false;
16568 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16571 simple_loop
= true;
16576 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16578 BB_END (bb
), &found
);
16581 int shortest_dist
= -1;
16582 bool found_in_bb
= false;
16584 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16587 = distance_non_agu_define_in_bb (regno1
, regno2
,
16593 if (shortest_dist
< 0)
16594 shortest_dist
= bb_dist
;
16595 else if (bb_dist
> 0)
16596 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16602 distance
= shortest_dist
;
16606 /* get_attr_type may modify recog data. We want to make sure
16607 that recog data is valid for instruction INSN, on which
16608 distance_non_agu_define is called. INSN is unchanged here. */
16609 extract_insn_cached (insn
);
16614 return distance
>> 1;
16617 /* Return the distance in half-cycles between INSN and the next
16618 insn that uses register number REGNO in memory address added
16619 to DISTANCE. Return -1 if REGNO0 is set.
16621 Put true value into *FOUND if register usage was found and
16623 Put true value into *REDEFINED if register redefinition was
16624 found and false otherwise. */
16627 distance_agu_use_in_bb (unsigned int regno
,
16628 rtx insn
, int distance
, rtx start
,
16629 bool *found
, bool *redefined
)
16631 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16636 *redefined
= false;
16640 && distance
< LEA_SEARCH_THRESHOLD
)
16642 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16644 distance
= increase_distance(prev
, next
, distance
);
16645 if (insn_uses_reg_mem (regno
, next
))
16647 /* Return DISTANCE if OP0 is used in memory
16648 address in NEXT. */
16653 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16655 /* Return -1 if OP0 is set in NEXT. */
16663 if (next
== BB_END (bb
))
16666 next
= NEXT_INSN (next
);
16672 /* Return the distance between INSN and the next insn that uses
16673 register number REGNO0 in memory address. Return -1 if no such
16674 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16677 distance_agu_use (unsigned int regno0
, rtx insn
)
16679 basic_block bb
= BLOCK_FOR_INSN (insn
);
16681 bool found
= false;
16682 bool redefined
= false;
16684 if (insn
!= BB_END (bb
))
16685 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16687 &found
, &redefined
);
16689 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16693 bool simple_loop
= false;
16695 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16698 simple_loop
= true;
16703 distance
= distance_agu_use_in_bb (regno0
, insn
,
16704 distance
, BB_HEAD (bb
),
16705 &found
, &redefined
);
16708 int shortest_dist
= -1;
16709 bool found_in_bb
= false;
16710 bool redefined_in_bb
= false;
16712 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16715 = distance_agu_use_in_bb (regno0
, insn
,
16716 distance
, BB_HEAD (e
->dest
),
16717 &found_in_bb
, &redefined_in_bb
);
16720 if (shortest_dist
< 0)
16721 shortest_dist
= bb_dist
;
16722 else if (bb_dist
> 0)
16723 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16729 distance
= shortest_dist
;
16733 if (!found
|| redefined
)
16736 return distance
>> 1;
16739 /* Define this macro to tune LEA priority vs ADD, it take effect when
16740 there is a dilemma of choicing LEA or ADD
16741 Negative value: ADD is more preferred than LEA
16743 Positive value: LEA is more preferred than ADD*/
16744 #define IX86_LEA_PRIORITY 0
16746 /* Return true if usage of lea INSN has performance advantage
16747 over a sequence of instructions. Instructions sequence has
16748 SPLIT_COST cycles higher latency than lea latency. */
16751 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16752 unsigned int regno2
, unsigned int split_cost
)
16754 int dist_define
, dist_use
;
16756 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16757 dist_use
= distance_agu_use (regno0
, insn
);
16759 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16761 /* If there is no non AGU operand definition, no AGU
16762 operand usage and split cost is 0 then both lea
16763 and non lea variants have same priority. Currently
16764 we prefer lea for 64 bit code and non lea on 32 bit
16766 if (dist_use
< 0 && split_cost
== 0)
16767 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16772 /* With longer definitions distance lea is more preferable.
16773 Here we change it to take into account splitting cost and
16775 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16777 /* If there is no use in memory addess then we just check
16778 that split cost does not exceed AGU stall. */
16780 return dist_define
>= LEA_MAX_STALL
;
16782 /* If this insn has both backward non-agu dependence and forward
16783 agu dependence, the one with short distance takes effect. */
16784 return dist_define
>= dist_use
;
16787 /* Return true if it is legal to clobber flags by INSN and
16788 false otherwise. */
16791 ix86_ok_to_clobber_flags (rtx insn
)
16793 basic_block bb
= BLOCK_FOR_INSN (insn
);
16799 if (NONDEBUG_INSN_P (insn
))
16801 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16802 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16805 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16809 if (insn
== BB_END (bb
))
16812 insn
= NEXT_INSN (insn
);
16815 live
= df_get_live_out(bb
);
16816 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16819 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16820 move and add to avoid AGU stalls. */
16823 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16825 unsigned int regno0
= true_regnum (operands
[0]);
16826 unsigned int regno1
= true_regnum (operands
[1]);
16827 unsigned int regno2
= true_regnum (operands
[2]);
16829 /* Check if we need to optimize. */
16830 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16833 /* Check it is correct to split here. */
16834 if (!ix86_ok_to_clobber_flags(insn
))
16837 /* We need to split only adds with non destructive
16838 destination operand. */
16839 if (regno0
== regno1
|| regno0
== regno2
)
16842 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16845 /* Return true if we should emit lea instruction instead of mov
16849 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
16851 unsigned int regno0
;
16852 unsigned int regno1
;
16854 /* Check if we need to optimize. */
16855 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16858 /* Use lea for reg to reg moves only. */
16859 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
16862 regno0
= true_regnum (operands
[0]);
16863 regno1
= true_regnum (operands
[1]);
16865 return ix86_lea_outperforms (insn
, regno0
, regno1
, -1, 0);
16868 /* Return true if we need to split lea into a sequence of
16869 instructions to avoid AGU stalls. */
16872 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
16874 unsigned int regno0
= true_regnum (operands
[0]) ;
16875 unsigned int regno1
= -1;
16876 unsigned int regno2
= -1;
16877 unsigned int split_cost
= 0;
16878 struct ix86_address parts
;
16881 /* Check we need to optimize. */
16882 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16885 /* Check it is correct to split here. */
16886 if (!ix86_ok_to_clobber_flags(insn
))
16889 ok
= ix86_decompose_address (operands
[1], &parts
);
16892 /* We should not split into add if non legitimate pic
16893 operand is used as displacement. */
16894 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16898 regno1
= true_regnum (parts
.base
);
16900 regno2
= true_regnum (parts
.index
);
16902 /* Compute how many cycles we will add to execution time
16903 if split lea into a sequence of instructions. */
16904 if (parts
.base
|| parts
.index
)
16906 /* Have to use mov instruction if non desctructive
16907 destination form is used. */
16908 if (regno1
!= regno0
&& regno2
!= regno0
)
16911 /* Have to add index to base if both exist. */
16912 if (parts
.base
&& parts
.index
)
16915 /* Have to use shift and adds if scale is 2 or greater. */
16916 if (parts
.scale
> 1)
16918 if (regno0
!= regno1
)
16920 else if (regno2
== regno0
)
16923 split_cost
+= parts
.scale
;
16926 /* Have to use add instruction with immediate if
16927 disp is non zero. */
16928 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16931 /* Subtract the price of lea. */
16935 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
16938 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16939 matches destination. RTX includes clobber of FLAGS_REG. */
16942 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
16947 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
16948 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16950 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16953 /* Split lea instructions into a sequence of instructions
16954 which are executed on ALU to avoid AGU stalls.
16955 It is assumed that it is allowed to clobber flags register
16956 at lea position. */
16959 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
16961 unsigned int regno0
= true_regnum (operands
[0]) ;
16962 unsigned int regno1
= INVALID_REGNUM
;
16963 unsigned int regno2
= INVALID_REGNUM
;
16964 struct ix86_address parts
;
16968 ok
= ix86_decompose_address (operands
[1], &parts
);
16973 if (GET_MODE (parts
.base
) != mode
)
16974 parts
.base
= gen_rtx_SUBREG (mode
, parts
.base
, 0);
16975 regno1
= true_regnum (parts
.base
);
16980 if (GET_MODE (parts
.index
) != mode
)
16981 parts
.index
= gen_rtx_SUBREG (mode
, parts
.index
, 0);
16982 regno2
= true_regnum (parts
.index
);
16985 if (parts
.scale
> 1)
16987 /* Case r1 = r1 + ... */
16988 if (regno1
== regno0
)
16990 /* If we have a case r1 = r1 + C * r1 then we
16991 should use multiplication which is very
16992 expensive. Assume cost model is wrong if we
16993 have such case here. */
16994 gcc_assert (regno2
!= regno0
);
16996 for (adds
= parts
.scale
; adds
> 0; adds
--)
16997 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.index
);
17001 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17002 if (regno0
!= regno2
)
17003 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
17005 /* Use shift for scaling. */
17006 ix86_emit_binop (ASHIFT
, mode
, operands
[0],
17007 GEN_INT (exact_log2 (parts
.scale
)));
17010 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.base
);
17012 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17013 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
17016 else if (!parts
.base
&& !parts
.index
)
17018 gcc_assert(parts
.disp
);
17019 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.disp
));
17025 if (regno0
!= regno2
)
17026 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
17028 else if (!parts
.index
)
17030 if (regno0
!= regno1
)
17031 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
17035 if (regno0
== regno1
)
17037 else if (regno0
== regno2
)
17041 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
17045 ix86_emit_binop (PLUS
, mode
, operands
[0], tmp
);
17048 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17049 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
17053 /* Return true if it is ok to optimize an ADD operation to LEA
17054 operation to avoid flag register consumation. For most processors,
17055 ADD is faster than LEA. For the processors like ATOM, if the
17056 destination register of LEA holds an actual address which will be
17057 used soon, LEA is better and otherwise ADD is better. */
17060 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17062 unsigned int regno0
= true_regnum (operands
[0]);
17063 unsigned int regno1
= true_regnum (operands
[1]);
17064 unsigned int regno2
= true_regnum (operands
[2]);
17066 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17067 if (regno0
!= regno1
&& regno0
!= regno2
)
17070 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17073 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17076 /* Return true if destination reg of SET_BODY is shift count of
17080 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17086 /* Retrieve destination of SET_BODY. */
17087 switch (GET_CODE (set_body
))
17090 set_dest
= SET_DEST (set_body
);
17091 if (!set_dest
|| !REG_P (set_dest
))
17095 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17096 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17104 /* Retrieve shift count of USE_BODY. */
17105 switch (GET_CODE (use_body
))
17108 shift_rtx
= XEXP (use_body
, 1);
17111 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17112 if (ix86_dep_by_shift_count_body (set_body
,
17113 XVECEXP (use_body
, 0, i
)))
17121 && (GET_CODE (shift_rtx
) == ASHIFT
17122 || GET_CODE (shift_rtx
) == LSHIFTRT
17123 || GET_CODE (shift_rtx
) == ASHIFTRT
17124 || GET_CODE (shift_rtx
) == ROTATE
17125 || GET_CODE (shift_rtx
) == ROTATERT
))
17127 rtx shift_count
= XEXP (shift_rtx
, 1);
17129 /* Return true if shift count is dest of SET_BODY. */
17130 if (REG_P (shift_count
)
17131 && true_regnum (set_dest
) == true_regnum (shift_count
))
17138 /* Return true if destination reg of SET_INSN is shift count of
17142 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17144 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17145 PATTERN (use_insn
));
17148 /* Return TRUE or FALSE depending on whether the unary operator meets the
17149 appropriate constraints. */
17152 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17153 enum machine_mode mode ATTRIBUTE_UNUSED
,
17154 rtx operands
[2] ATTRIBUTE_UNUSED
)
17156 /* If one of operands is memory, source and destination must match. */
17157 if ((MEM_P (operands
[0])
17158 || MEM_P (operands
[1]))
17159 && ! rtx_equal_p (operands
[0], operands
[1]))
17164 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17165 are ok, keeping in mind the possible movddup alternative. */
17168 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17170 if (MEM_P (operands
[0]))
17171 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17172 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17173 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17177 /* Post-reload splitter for converting an SF or DFmode value in an
17178 SSE register into an unsigned SImode. */
17181 ix86_split_convert_uns_si_sse (rtx operands
[])
17183 enum machine_mode vecmode
;
17184 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17186 large
= operands
[1];
17187 zero_or_two31
= operands
[2];
17188 input
= operands
[3];
17189 two31
= operands
[4];
17190 vecmode
= GET_MODE (large
);
17191 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17193 /* Load up the value into the low element. We must ensure that the other
17194 elements are valid floats -- zero is the easiest such value. */
17197 if (vecmode
== V4SFmode
)
17198 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17200 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17204 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17205 emit_move_insn (value
, CONST0_RTX (vecmode
));
17206 if (vecmode
== V4SFmode
)
17207 emit_insn (gen_sse_movss (value
, value
, input
));
17209 emit_insn (gen_sse2_movsd (value
, value
, input
));
17212 emit_move_insn (large
, two31
);
17213 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17215 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17216 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17218 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17219 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17221 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17222 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17224 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17225 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17227 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17228 if (vecmode
== V4SFmode
)
17229 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17231 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17234 emit_insn (gen_xorv4si3 (value
, value
, large
));
17237 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17238 Expects the 64-bit DImode to be supplied in a pair of integral
17239 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17240 -mfpmath=sse, !optimize_size only. */
17243 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17245 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17246 rtx int_xmm
, fp_xmm
;
17247 rtx biases
, exponents
;
17250 int_xmm
= gen_reg_rtx (V4SImode
);
17251 if (TARGET_INTER_UNIT_MOVES
)
17252 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17253 else if (TARGET_SSE_SPLIT_REGS
)
17255 emit_clobber (int_xmm
);
17256 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17260 x
= gen_reg_rtx (V2DImode
);
17261 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17262 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17265 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17266 gen_rtvec (4, GEN_INT (0x43300000UL
),
17267 GEN_INT (0x45300000UL
),
17268 const0_rtx
, const0_rtx
));
17269 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17271 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17272 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17274 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17275 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17276 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17277 (0x1.0p84 + double(fp_value_hi_xmm)).
17278 Note these exponents differ by 32. */
17280 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17282 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17283 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17284 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17285 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17286 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17287 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17288 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17289 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17290 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17292 /* Add the upper and lower DFmode values together. */
17294 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17297 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17298 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17299 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17302 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17305 /* Not used, but eases macroization of patterns. */
17307 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17308 rtx input ATTRIBUTE_UNUSED
)
17310 gcc_unreachable ();
17313 /* Convert an unsigned SImode value into a DFmode. Only currently used
17314 for SSE, but applicable anywhere. */
17317 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17319 REAL_VALUE_TYPE TWO31r
;
17322 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17323 NULL
, 1, OPTAB_DIRECT
);
17325 fp
= gen_reg_rtx (DFmode
);
17326 emit_insn (gen_floatsidf2 (fp
, x
));
17328 real_ldexp (&TWO31r
, &dconst1
, 31);
17329 x
= const_double_from_real_value (TWO31r
, DFmode
);
17331 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17333 emit_move_insn (target
, x
);
17336 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17337 32-bit mode; otherwise we have a direct convert instruction. */
17340 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17342 REAL_VALUE_TYPE TWO32r
;
17343 rtx fp_lo
, fp_hi
, x
;
17345 fp_lo
= gen_reg_rtx (DFmode
);
17346 fp_hi
= gen_reg_rtx (DFmode
);
17348 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17350 real_ldexp (&TWO32r
, &dconst1
, 32);
17351 x
= const_double_from_real_value (TWO32r
, DFmode
);
17352 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17354 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17356 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17359 emit_move_insn (target
, x
);
17362 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17363 For x86_32, -mfpmath=sse, !optimize_size only. */
17365 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17367 REAL_VALUE_TYPE ONE16r
;
17368 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17370 real_ldexp (&ONE16r
, &dconst1
, 16);
17371 x
= const_double_from_real_value (ONE16r
, SFmode
);
17372 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17373 NULL
, 0, OPTAB_DIRECT
);
17374 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17375 NULL
, 0, OPTAB_DIRECT
);
17376 fp_hi
= gen_reg_rtx (SFmode
);
17377 fp_lo
= gen_reg_rtx (SFmode
);
17378 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17379 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17380 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17382 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17384 if (!rtx_equal_p (target
, fp_hi
))
17385 emit_move_insn (target
, fp_hi
);
17388 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17389 a vector of unsigned ints VAL to vector of floats TARGET. */
17392 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17395 REAL_VALUE_TYPE TWO16r
;
17396 enum machine_mode intmode
= GET_MODE (val
);
17397 enum machine_mode fltmode
= GET_MODE (target
);
17398 rtx (*cvt
) (rtx
, rtx
);
17400 if (intmode
== V4SImode
)
17401 cvt
= gen_floatv4siv4sf2
;
17403 cvt
= gen_floatv8siv8sf2
;
17404 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17405 tmp
[0] = force_reg (intmode
, tmp
[0]);
17406 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17408 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17409 NULL_RTX
, 1, OPTAB_DIRECT
);
17410 tmp
[3] = gen_reg_rtx (fltmode
);
17411 emit_insn (cvt (tmp
[3], tmp
[1]));
17412 tmp
[4] = gen_reg_rtx (fltmode
);
17413 emit_insn (cvt (tmp
[4], tmp
[2]));
17414 real_ldexp (&TWO16r
, &dconst1
, 16);
17415 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17416 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17417 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17419 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17421 if (tmp
[7] != target
)
17422 emit_move_insn (target
, tmp
[7]);
17425 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17426 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17427 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17428 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17431 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17433 REAL_VALUE_TYPE TWO31r
;
17434 rtx two31r
, tmp
[4];
17435 enum machine_mode mode
= GET_MODE (val
);
17436 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17437 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17438 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17441 for (i
= 0; i
< 3; i
++)
17442 tmp
[i
] = gen_reg_rtx (mode
);
17443 real_ldexp (&TWO31r
, &dconst1
, 31);
17444 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17445 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17446 two31r
= force_reg (mode
, two31r
);
17449 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17450 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17451 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17452 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17453 default: gcc_unreachable ();
17455 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17456 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17457 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17459 if (intmode
== V4SImode
|| TARGET_AVX2
)
17460 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17461 gen_lowpart (intmode
, tmp
[0]),
17462 GEN_INT (31), NULL_RTX
, 0,
17466 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17467 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17468 *xorp
= expand_simple_binop (intmode
, AND
,
17469 gen_lowpart (intmode
, tmp
[0]),
17470 two31
, NULL_RTX
, 0,
17473 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17477 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17478 then replicate the value for all elements of the vector
17482 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17486 enum machine_mode scalar_mode
;
17503 n_elt
= GET_MODE_NUNITS (mode
);
17504 v
= rtvec_alloc (n_elt
);
17505 scalar_mode
= GET_MODE_INNER (mode
);
17507 RTVEC_ELT (v
, 0) = value
;
17509 for (i
= 1; i
< n_elt
; ++i
)
17510 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17512 return gen_rtx_CONST_VECTOR (mode
, v
);
17515 gcc_unreachable ();
17519 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17520 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17521 for an SSE register. If VECT is true, then replicate the mask for
17522 all elements of the vector register. If INVERT is true, then create
17523 a mask excluding the sign bit. */
17526 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17528 enum machine_mode vec_mode
, imode
;
17529 HOST_WIDE_INT hi
, lo
;
17534 /* Find the sign bit, sign extended to 2*HWI. */
17542 mode
= GET_MODE_INNER (mode
);
17544 lo
= 0x80000000, hi
= lo
< 0;
17552 mode
= GET_MODE_INNER (mode
);
17554 if (HOST_BITS_PER_WIDE_INT
>= 64)
17555 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17557 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17562 vec_mode
= VOIDmode
;
17563 if (HOST_BITS_PER_WIDE_INT
>= 64)
17566 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17573 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17577 lo
= ~lo
, hi
= ~hi
;
17583 mask
= immed_double_const (lo
, hi
, imode
);
17585 vec
= gen_rtvec (2, v
, mask
);
17586 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17587 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17594 gcc_unreachable ();
17598 lo
= ~lo
, hi
= ~hi
;
17600 /* Force this value into the low part of a fp vector constant. */
17601 mask
= immed_double_const (lo
, hi
, imode
);
17602 mask
= gen_lowpart (mode
, mask
);
17604 if (vec_mode
== VOIDmode
)
17605 return force_reg (mode
, mask
);
17607 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17608 return force_reg (vec_mode
, v
);
17611 /* Generate code for floating point ABS or NEG. */
17614 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17617 rtx mask
, set
, dst
, src
;
17618 bool use_sse
= false;
17619 bool vector_mode
= VECTOR_MODE_P (mode
);
17620 enum machine_mode vmode
= mode
;
17624 else if (mode
== TFmode
)
17626 else if (TARGET_SSE_MATH
)
17628 use_sse
= SSE_FLOAT_MODE_P (mode
);
17629 if (mode
== SFmode
)
17631 else if (mode
== DFmode
)
17635 /* NEG and ABS performed with SSE use bitwise mask operations.
17636 Create the appropriate mask now. */
17638 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17645 set
= gen_rtx_fmt_e (code
, mode
, src
);
17646 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17653 use
= gen_rtx_USE (VOIDmode
, mask
);
17655 par
= gen_rtvec (2, set
, use
);
17658 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17659 par
= gen_rtvec (3, set
, use
, clob
);
17661 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17667 /* Expand a copysign operation. Special case operand 0 being a constant. */
17670 ix86_expand_copysign (rtx operands
[])
17672 enum machine_mode mode
, vmode
;
17673 rtx dest
, op0
, op1
, mask
, nmask
;
17675 dest
= operands
[0];
17679 mode
= GET_MODE (dest
);
17681 if (mode
== SFmode
)
17683 else if (mode
== DFmode
)
17688 if (GET_CODE (op0
) == CONST_DOUBLE
)
17690 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17692 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17693 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17695 if (mode
== SFmode
|| mode
== DFmode
)
17697 if (op0
== CONST0_RTX (mode
))
17698 op0
= CONST0_RTX (vmode
);
17701 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17703 op0
= force_reg (vmode
, v
);
17706 else if (op0
!= CONST0_RTX (mode
))
17707 op0
= force_reg (mode
, op0
);
17709 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17711 if (mode
== SFmode
)
17712 copysign_insn
= gen_copysignsf3_const
;
17713 else if (mode
== DFmode
)
17714 copysign_insn
= gen_copysigndf3_const
;
17716 copysign_insn
= gen_copysigntf3_const
;
17718 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17722 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17724 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17725 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17727 if (mode
== SFmode
)
17728 copysign_insn
= gen_copysignsf3_var
;
17729 else if (mode
== DFmode
)
17730 copysign_insn
= gen_copysigndf3_var
;
17732 copysign_insn
= gen_copysigntf3_var
;
17734 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17738 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17739 be a constant, and so has already been expanded into a vector constant. */
17742 ix86_split_copysign_const (rtx operands
[])
17744 enum machine_mode mode
, vmode
;
17745 rtx dest
, op0
, mask
, x
;
17747 dest
= operands
[0];
17749 mask
= operands
[3];
17751 mode
= GET_MODE (dest
);
17752 vmode
= GET_MODE (mask
);
17754 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17755 x
= gen_rtx_AND (vmode
, dest
, mask
);
17756 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17758 if (op0
!= CONST0_RTX (vmode
))
17760 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17761 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17765 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17766 so we have to do two masks. */
17769 ix86_split_copysign_var (rtx operands
[])
17771 enum machine_mode mode
, vmode
;
17772 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17774 dest
= operands
[0];
17775 scratch
= operands
[1];
17778 nmask
= operands
[4];
17779 mask
= operands
[5];
17781 mode
= GET_MODE (dest
);
17782 vmode
= GET_MODE (mask
);
17784 if (rtx_equal_p (op0
, op1
))
17786 /* Shouldn't happen often (it's useless, obviously), but when it does
17787 we'd generate incorrect code if we continue below. */
17788 emit_move_insn (dest
, op0
);
17792 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17794 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17796 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17797 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17800 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17801 x
= gen_rtx_NOT (vmode
, dest
);
17802 x
= gen_rtx_AND (vmode
, x
, op0
);
17803 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17807 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17809 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17811 else /* alternative 2,4 */
17813 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17814 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17815 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17817 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17819 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17821 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17822 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17824 else /* alternative 3,4 */
17826 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17828 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17829 x
= gen_rtx_AND (vmode
, dest
, op0
);
17831 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17834 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
17835 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17838 /* Return TRUE or FALSE depending on whether the first SET in INSN
17839 has source and destination with matching CC modes, and that the
17840 CC mode is at least as constrained as REQ_MODE. */
17843 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
17846 enum machine_mode set_mode
;
17848 set
= PATTERN (insn
);
17849 if (GET_CODE (set
) == PARALLEL
)
17850 set
= XVECEXP (set
, 0, 0);
17851 gcc_assert (GET_CODE (set
) == SET
);
17852 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
17854 set_mode
= GET_MODE (SET_DEST (set
));
17858 if (req_mode
!= CCNOmode
17859 && (req_mode
!= CCmode
17860 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
17864 if (req_mode
== CCGCmode
)
17868 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
17872 if (req_mode
== CCZmode
)
17882 if (set_mode
!= req_mode
)
17887 gcc_unreachable ();
17890 return GET_MODE (SET_SRC (set
)) == set_mode
;
17893 /* Generate insn patterns to do an integer compare of OPERANDS. */
17896 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17898 enum machine_mode cmpmode
;
17901 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17902 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17904 /* This is very simple, but making the interface the same as in the
17905 FP case makes the rest of the code easier. */
17906 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17907 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17909 /* Return the test that should be put into the flags user, i.e.
17910 the bcc, scc, or cmov instruction. */
17911 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17914 /* Figure out whether to use ordered or unordered fp comparisons.
17915 Return the appropriate mode to use. */
17918 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17920 /* ??? In order to make all comparisons reversible, we do all comparisons
17921 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17922 all forms trapping and nontrapping comparisons, we can make inequality
17923 comparisons trapping again, since it results in better code when using
17924 FCOM based compares. */
17925 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17929 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17931 enum machine_mode mode
= GET_MODE (op0
);
17933 if (SCALAR_FLOAT_MODE_P (mode
))
17935 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17936 return ix86_fp_compare_mode (code
);
17941 /* Only zero flag is needed. */
17942 case EQ
: /* ZF=0 */
17943 case NE
: /* ZF!=0 */
17945 /* Codes needing carry flag. */
17946 case GEU
: /* CF=0 */
17947 case LTU
: /* CF=1 */
17948 /* Detect overflow checks. They need just the carry flag. */
17949 if (GET_CODE (op0
) == PLUS
17950 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17954 case GTU
: /* CF=0 & ZF=0 */
17955 case LEU
: /* CF=1 | ZF=1 */
17956 /* Detect overflow checks. They need just the carry flag. */
17957 if (GET_CODE (op0
) == MINUS
17958 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17962 /* Codes possibly doable only with sign flag when
17963 comparing against zero. */
17964 case GE
: /* SF=OF or SF=0 */
17965 case LT
: /* SF<>OF or SF=1 */
17966 if (op1
== const0_rtx
)
17969 /* For other cases Carry flag is not required. */
17971 /* Codes doable only with sign flag when comparing
17972 against zero, but we miss jump instruction for it
17973 so we need to use relational tests against overflow
17974 that thus needs to be zero. */
17975 case GT
: /* ZF=0 & SF=OF */
17976 case LE
: /* ZF=1 | SF<>OF */
17977 if (op1
== const0_rtx
)
17981 /* strcmp pattern do (use flags) and combine may ask us for proper
17986 gcc_unreachable ();
17990 /* Return the fixed registers used for condition codes. */
17993 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18000 /* If two condition code modes are compatible, return a condition code
18001 mode which is compatible with both. Otherwise, return
18004 static enum machine_mode
18005 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18010 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18013 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18014 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18017 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18019 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18025 gcc_unreachable ();
18055 /* These are only compatible with themselves, which we already
18062 /* Return a comparison we can do and that it is equivalent to
18063 swap_condition (code) apart possibly from orderedness.
18064 But, never change orderedness if TARGET_IEEE_FP, returning
18065 UNKNOWN in that case if necessary. */
18067 static enum rtx_code
18068 ix86_fp_swap_condition (enum rtx_code code
)
18072 case GT
: /* GTU - CF=0 & ZF=0 */
18073 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18074 case GE
: /* GEU - CF=0 */
18075 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18076 case UNLT
: /* LTU - CF=1 */
18077 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18078 case UNLE
: /* LEU - CF=1 | ZF=1 */
18079 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18081 return swap_condition (code
);
18085 /* Return cost of comparison CODE using the best strategy for performance.
18086 All following functions do use number of instructions as a cost metrics.
18087 In future this should be tweaked to compute bytes for optimize_size and
18088 take into account performance of various instructions on various CPUs. */
18091 ix86_fp_comparison_cost (enum rtx_code code
)
18095 /* The cost of code using bit-twiddling on %ah. */
18112 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18116 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18119 gcc_unreachable ();
18122 switch (ix86_fp_comparison_strategy (code
))
18124 case IX86_FPCMP_COMI
:
18125 return arith_cost
> 4 ? 3 : 2;
18126 case IX86_FPCMP_SAHF
:
18127 return arith_cost
> 4 ? 4 : 3;
18133 /* Return strategy to use for floating-point. We assume that fcomi is always
18134 preferrable where available, since that is also true when looking at size
18135 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18137 enum ix86_fpcmp_strategy
18138 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18140 /* Do fcomi/sahf based test when profitable. */
18143 return IX86_FPCMP_COMI
;
18145 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18146 return IX86_FPCMP_SAHF
;
18148 return IX86_FPCMP_ARITH
;
18151 /* Swap, force into registers, or otherwise massage the two operands
18152 to a fp comparison. The operands are updated in place; the new
18153 comparison code is returned. */
18155 static enum rtx_code
18156 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18158 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18159 rtx op0
= *pop0
, op1
= *pop1
;
18160 enum machine_mode op_mode
= GET_MODE (op0
);
18161 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18163 /* All of the unordered compare instructions only work on registers.
18164 The same is true of the fcomi compare instructions. The XFmode
18165 compare instructions require registers except when comparing
18166 against zero or when converting operand 1 from fixed point to
18170 && (fpcmp_mode
== CCFPUmode
18171 || (op_mode
== XFmode
18172 && ! (standard_80387_constant_p (op0
) == 1
18173 || standard_80387_constant_p (op1
) == 1)
18174 && GET_CODE (op1
) != FLOAT
)
18175 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18177 op0
= force_reg (op_mode
, op0
);
18178 op1
= force_reg (op_mode
, op1
);
18182 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18183 things around if they appear profitable, otherwise force op0
18184 into a register. */
18186 if (standard_80387_constant_p (op0
) == 0
18188 && ! (standard_80387_constant_p (op1
) == 0
18191 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18192 if (new_code
!= UNKNOWN
)
18195 tmp
= op0
, op0
= op1
, op1
= tmp
;
18201 op0
= force_reg (op_mode
, op0
);
18203 if (CONSTANT_P (op1
))
18205 int tmp
= standard_80387_constant_p (op1
);
18207 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18211 op1
= force_reg (op_mode
, op1
);
18214 op1
= force_reg (op_mode
, op1
);
18218 /* Try to rearrange the comparison to make it cheaper. */
18219 if (ix86_fp_comparison_cost (code
)
18220 > ix86_fp_comparison_cost (swap_condition (code
))
18221 && (REG_P (op1
) || can_create_pseudo_p ()))
18224 tmp
= op0
, op0
= op1
, op1
= tmp
;
18225 code
= swap_condition (code
);
18227 op0
= force_reg (op_mode
, op0
);
18235 /* Convert comparison codes we use to represent FP comparison to integer
18236 code that will result in proper branch. Return UNKNOWN if no such code
18240 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18269 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18272 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18274 enum machine_mode fpcmp_mode
, intcmp_mode
;
18277 fpcmp_mode
= ix86_fp_compare_mode (code
);
18278 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18280 /* Do fcomi/sahf based test when profitable. */
18281 switch (ix86_fp_comparison_strategy (code
))
18283 case IX86_FPCMP_COMI
:
18284 intcmp_mode
= fpcmp_mode
;
18285 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18286 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18291 case IX86_FPCMP_SAHF
:
18292 intcmp_mode
= fpcmp_mode
;
18293 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18294 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18298 scratch
= gen_reg_rtx (HImode
);
18299 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18300 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18303 case IX86_FPCMP_ARITH
:
18304 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18305 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18306 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18308 scratch
= gen_reg_rtx (HImode
);
18309 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18311 /* In the unordered case, we have to check C2 for NaN's, which
18312 doesn't happen to work out to anything nice combination-wise.
18313 So do some bit twiddling on the value we've got in AH to come
18314 up with an appropriate set of condition codes. */
18316 intcmp_mode
= CCNOmode
;
18321 if (code
== GT
|| !TARGET_IEEE_FP
)
18323 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18328 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18329 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18330 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18331 intcmp_mode
= CCmode
;
18337 if (code
== LT
&& TARGET_IEEE_FP
)
18339 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18340 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18341 intcmp_mode
= CCmode
;
18346 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18352 if (code
== GE
|| !TARGET_IEEE_FP
)
18354 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18359 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18360 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18366 if (code
== LE
&& TARGET_IEEE_FP
)
18368 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18369 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18370 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18371 intcmp_mode
= CCmode
;
18376 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18382 if (code
== EQ
&& TARGET_IEEE_FP
)
18384 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18385 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18386 intcmp_mode
= CCmode
;
18391 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18397 if (code
== NE
&& TARGET_IEEE_FP
)
18399 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18400 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18406 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18412 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18416 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18421 gcc_unreachable ();
18429 /* Return the test that should be put into the flags user, i.e.
18430 the bcc, scc, or cmov instruction. */
18431 return gen_rtx_fmt_ee (code
, VOIDmode
,
18432 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18437 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18441 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18442 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18444 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18446 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18447 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18450 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18456 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18458 enum machine_mode mode
= GET_MODE (op0
);
18470 tmp
= ix86_expand_compare (code
, op0
, op1
);
18471 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18472 gen_rtx_LABEL_REF (VOIDmode
, label
),
18474 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18481 /* Expand DImode branch into multiple compare+branch. */
18483 rtx lo
[2], hi
[2], label2
;
18484 enum rtx_code code1
, code2
, code3
;
18485 enum machine_mode submode
;
18487 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18489 tmp
= op0
, op0
= op1
, op1
= tmp
;
18490 code
= swap_condition (code
);
18493 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18494 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18496 submode
= mode
== DImode
? SImode
: DImode
;
18498 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18499 avoid two branches. This costs one extra insn, so disable when
18500 optimizing for size. */
18502 if ((code
== EQ
|| code
== NE
)
18503 && (!optimize_insn_for_size_p ()
18504 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18509 if (hi
[1] != const0_rtx
)
18510 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18511 NULL_RTX
, 0, OPTAB_WIDEN
);
18514 if (lo
[1] != const0_rtx
)
18515 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18516 NULL_RTX
, 0, OPTAB_WIDEN
);
18518 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18519 NULL_RTX
, 0, OPTAB_WIDEN
);
18521 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18525 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18526 op1 is a constant and the low word is zero, then we can just
18527 examine the high word. Similarly for low word -1 and
18528 less-or-equal-than or greater-than. */
18530 if (CONST_INT_P (hi
[1]))
18533 case LT
: case LTU
: case GE
: case GEU
:
18534 if (lo
[1] == const0_rtx
)
18536 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18540 case LE
: case LEU
: case GT
: case GTU
:
18541 if (lo
[1] == constm1_rtx
)
18543 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18551 /* Otherwise, we need two or three jumps. */
18553 label2
= gen_label_rtx ();
18556 code2
= swap_condition (code
);
18557 code3
= unsigned_condition (code
);
18561 case LT
: case GT
: case LTU
: case GTU
:
18564 case LE
: code1
= LT
; code2
= GT
; break;
18565 case GE
: code1
= GT
; code2
= LT
; break;
18566 case LEU
: code1
= LTU
; code2
= GTU
; break;
18567 case GEU
: code1
= GTU
; code2
= LTU
; break;
18569 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18570 case NE
: code2
= UNKNOWN
; break;
18573 gcc_unreachable ();
18578 * if (hi(a) < hi(b)) goto true;
18579 * if (hi(a) > hi(b)) goto false;
18580 * if (lo(a) < lo(b)) goto true;
18584 if (code1
!= UNKNOWN
)
18585 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18586 if (code2
!= UNKNOWN
)
18587 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18589 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18591 if (code2
!= UNKNOWN
)
18592 emit_label (label2
);
18597 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18602 /* Split branch based on floating point condition. */
18604 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18605 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18610 if (target2
!= pc_rtx
)
18613 code
= reverse_condition_maybe_unordered (code
);
18618 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18621 /* Remove pushed operand from stack. */
18623 ix86_free_from_memory (GET_MODE (pushed
));
18625 i
= emit_jump_insn (gen_rtx_SET
18627 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18628 condition
, target1
, target2
)));
18629 if (split_branch_probability
>= 0)
18630 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18634 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18638 gcc_assert (GET_MODE (dest
) == QImode
);
18640 ret
= ix86_expand_compare (code
, op0
, op1
);
18641 PUT_MODE (ret
, QImode
);
18642 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18645 /* Expand comparison setting or clearing carry flag. Return true when
18646 successful and set pop for the operation. */
18648 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18650 enum machine_mode mode
=
18651 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18653 /* Do not handle double-mode compares that go through special path. */
18654 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18657 if (SCALAR_FLOAT_MODE_P (mode
))
18659 rtx compare_op
, compare_seq
;
18661 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18663 /* Shortcut: following common codes never translate
18664 into carry flag compares. */
18665 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18666 || code
== ORDERED
|| code
== UNORDERED
)
18669 /* These comparisons require zero flag; swap operands so they won't. */
18670 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18671 && !TARGET_IEEE_FP
)
18676 code
= swap_condition (code
);
18679 /* Try to expand the comparison and verify that we end up with
18680 carry flag based comparison. This fails to be true only when
18681 we decide to expand comparison using arithmetic that is not
18682 too common scenario. */
18684 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18685 compare_seq
= get_insns ();
18688 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18689 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18690 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18692 code
= GET_CODE (compare_op
);
18694 if (code
!= LTU
&& code
!= GEU
)
18697 emit_insn (compare_seq
);
18702 if (!INTEGRAL_MODE_P (mode
))
18711 /* Convert a==0 into (unsigned)a<1. */
18714 if (op1
!= const0_rtx
)
18717 code
= (code
== EQ
? LTU
: GEU
);
18720 /* Convert a>b into b<a or a>=b-1. */
18723 if (CONST_INT_P (op1
))
18725 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18726 /* Bail out on overflow. We still can swap operands but that
18727 would force loading of the constant into register. */
18728 if (op1
== const0_rtx
18729 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18731 code
= (code
== GTU
? GEU
: LTU
);
18738 code
= (code
== GTU
? LTU
: GEU
);
18742 /* Convert a>=0 into (unsigned)a<0x80000000. */
18745 if (mode
== DImode
|| op1
!= const0_rtx
)
18747 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18748 code
= (code
== LT
? GEU
: LTU
);
18752 if (mode
== DImode
|| op1
!= constm1_rtx
)
18754 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18755 code
= (code
== LE
? GEU
: LTU
);
18761 /* Swapping operands may cause constant to appear as first operand. */
18762 if (!nonimmediate_operand (op0
, VOIDmode
))
18764 if (!can_create_pseudo_p ())
18766 op0
= force_reg (mode
, op0
);
18768 *pop
= ix86_expand_compare (code
, op0
, op1
);
18769 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18774 ix86_expand_int_movcc (rtx operands
[])
18776 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18777 rtx compare_seq
, compare_op
;
18778 enum machine_mode mode
= GET_MODE (operands
[0]);
18779 bool sign_bit_compare_p
= false;
18780 rtx op0
= XEXP (operands
[1], 0);
18781 rtx op1
= XEXP (operands
[1], 1);
18783 if (GET_MODE (op0
) == TImode
18784 || (GET_MODE (op0
) == DImode
18789 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18790 compare_seq
= get_insns ();
18793 compare_code
= GET_CODE (compare_op
);
18795 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18796 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18797 sign_bit_compare_p
= true;
18799 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18800 HImode insns, we'd be swallowed in word prefix ops. */
18802 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18803 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18804 && CONST_INT_P (operands
[2])
18805 && CONST_INT_P (operands
[3]))
18807 rtx out
= operands
[0];
18808 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18809 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18810 HOST_WIDE_INT diff
;
18813 /* Sign bit compares are better done using shifts than we do by using
18815 if (sign_bit_compare_p
18816 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18818 /* Detect overlap between destination and compare sources. */
18821 if (!sign_bit_compare_p
)
18824 bool fpcmp
= false;
18826 compare_code
= GET_CODE (compare_op
);
18828 flags
= XEXP (compare_op
, 0);
18830 if (GET_MODE (flags
) == CCFPmode
18831 || GET_MODE (flags
) == CCFPUmode
)
18835 = ix86_fp_compare_code_to_integer (compare_code
);
18838 /* To simplify rest of code, restrict to the GEU case. */
18839 if (compare_code
== LTU
)
18841 HOST_WIDE_INT tmp
= ct
;
18844 compare_code
= reverse_condition (compare_code
);
18845 code
= reverse_condition (code
);
18850 PUT_CODE (compare_op
,
18851 reverse_condition_maybe_unordered
18852 (GET_CODE (compare_op
)));
18854 PUT_CODE (compare_op
,
18855 reverse_condition (GET_CODE (compare_op
)));
18859 if (reg_overlap_mentioned_p (out
, op0
)
18860 || reg_overlap_mentioned_p (out
, op1
))
18861 tmp
= gen_reg_rtx (mode
);
18863 if (mode
== DImode
)
18864 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
18866 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
18867 flags
, compare_op
));
18871 if (code
== GT
|| code
== GE
)
18872 code
= reverse_condition (code
);
18875 HOST_WIDE_INT tmp
= ct
;
18880 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18893 tmp
= expand_simple_binop (mode
, PLUS
,
18895 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18906 tmp
= expand_simple_binop (mode
, IOR
,
18908 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18910 else if (diff
== -1 && ct
)
18920 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18922 tmp
= expand_simple_binop (mode
, PLUS
,
18923 copy_rtx (tmp
), GEN_INT (cf
),
18924 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18932 * andl cf - ct, dest
18942 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18945 tmp
= expand_simple_binop (mode
, AND
,
18947 gen_int_mode (cf
- ct
, mode
),
18948 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18950 tmp
= expand_simple_binop (mode
, PLUS
,
18951 copy_rtx (tmp
), GEN_INT (ct
),
18952 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18955 if (!rtx_equal_p (tmp
, out
))
18956 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18963 enum machine_mode cmp_mode
= GET_MODE (op0
);
18966 tmp
= ct
, ct
= cf
, cf
= tmp
;
18969 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18971 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18973 /* We may be reversing unordered compare to normal compare, that
18974 is not valid in general (we may convert non-trapping condition
18975 to trapping one), however on i386 we currently emit all
18976 comparisons unordered. */
18977 compare_code
= reverse_condition_maybe_unordered (compare_code
);
18978 code
= reverse_condition_maybe_unordered (code
);
18982 compare_code
= reverse_condition (compare_code
);
18983 code
= reverse_condition (code
);
18987 compare_code
= UNKNOWN
;
18988 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
18989 && CONST_INT_P (op1
))
18991 if (op1
== const0_rtx
18992 && (code
== LT
|| code
== GE
))
18993 compare_code
= code
;
18994 else if (op1
== constm1_rtx
)
18998 else if (code
== GT
)
19003 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19004 if (compare_code
!= UNKNOWN
19005 && GET_MODE (op0
) == GET_MODE (out
)
19006 && (cf
== -1 || ct
== -1))
19008 /* If lea code below could be used, only optimize
19009 if it results in a 2 insn sequence. */
19011 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19012 || diff
== 3 || diff
== 5 || diff
== 9)
19013 || (compare_code
== LT
&& ct
== -1)
19014 || (compare_code
== GE
&& cf
== -1))
19017 * notl op1 (if necessary)
19025 code
= reverse_condition (code
);
19028 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19030 out
= expand_simple_binop (mode
, IOR
,
19032 out
, 1, OPTAB_DIRECT
);
19033 if (out
!= operands
[0])
19034 emit_move_insn (operands
[0], out
);
19041 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19042 || diff
== 3 || diff
== 5 || diff
== 9)
19043 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19045 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19051 * lea cf(dest*(ct-cf)),dest
19055 * This also catches the degenerate setcc-only case.
19061 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19064 /* On x86_64 the lea instruction operates on Pmode, so we need
19065 to get arithmetics done in proper mode to match. */
19067 tmp
= copy_rtx (out
);
19071 out1
= copy_rtx (out
);
19072 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19076 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19082 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19085 if (!rtx_equal_p (tmp
, out
))
19088 out
= force_operand (tmp
, copy_rtx (out
));
19090 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19092 if (!rtx_equal_p (out
, operands
[0]))
19093 emit_move_insn (operands
[0], copy_rtx (out
));
19099 * General case: Jumpful:
19100 * xorl dest,dest cmpl op1, op2
19101 * cmpl op1, op2 movl ct, dest
19102 * setcc dest jcc 1f
19103 * decl dest movl cf, dest
19104 * andl (cf-ct),dest 1:
19107 * Size 20. Size 14.
19109 * This is reasonably steep, but branch mispredict costs are
19110 * high on modern cpus, so consider failing only if optimizing
19114 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19115 && BRANCH_COST (optimize_insn_for_speed_p (),
19120 enum machine_mode cmp_mode
= GET_MODE (op0
);
19125 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19127 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19129 /* We may be reversing unordered compare to normal compare,
19130 that is not valid in general (we may convert non-trapping
19131 condition to trapping one), however on i386 we currently
19132 emit all comparisons unordered. */
19133 code
= reverse_condition_maybe_unordered (code
);
19137 code
= reverse_condition (code
);
19138 if (compare_code
!= UNKNOWN
)
19139 compare_code
= reverse_condition (compare_code
);
19143 if (compare_code
!= UNKNOWN
)
19145 /* notl op1 (if needed)
19150 For x < 0 (resp. x <= -1) there will be no notl,
19151 so if possible swap the constants to get rid of the
19153 True/false will be -1/0 while code below (store flag
19154 followed by decrement) is 0/-1, so the constants need
19155 to be exchanged once more. */
19157 if (compare_code
== GE
|| !cf
)
19159 code
= reverse_condition (code
);
19164 HOST_WIDE_INT tmp
= cf
;
19169 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19173 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19175 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19177 copy_rtx (out
), 1, OPTAB_DIRECT
);
19180 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19181 gen_int_mode (cf
- ct
, mode
),
19182 copy_rtx (out
), 1, OPTAB_DIRECT
);
19184 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19185 copy_rtx (out
), 1, OPTAB_DIRECT
);
19186 if (!rtx_equal_p (out
, operands
[0]))
19187 emit_move_insn (operands
[0], copy_rtx (out
));
19193 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19195 /* Try a few things more with specific constants and a variable. */
19198 rtx var
, orig_out
, out
, tmp
;
19200 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19203 /* If one of the two operands is an interesting constant, load a
19204 constant with the above and mask it in with a logical operation. */
19206 if (CONST_INT_P (operands
[2]))
19209 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19210 operands
[3] = constm1_rtx
, op
= and_optab
;
19211 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19212 operands
[3] = const0_rtx
, op
= ior_optab
;
19216 else if (CONST_INT_P (operands
[3]))
19219 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19220 operands
[2] = constm1_rtx
, op
= and_optab
;
19221 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19222 operands
[2] = const0_rtx
, op
= ior_optab
;
19229 orig_out
= operands
[0];
19230 tmp
= gen_reg_rtx (mode
);
19233 /* Recurse to get the constant loaded. */
19234 if (ix86_expand_int_movcc (operands
) == 0)
19237 /* Mask in the interesting variable. */
19238 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19240 if (!rtx_equal_p (out
, orig_out
))
19241 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19247 * For comparison with above,
19257 if (! nonimmediate_operand (operands
[2], mode
))
19258 operands
[2] = force_reg (mode
, operands
[2]);
19259 if (! nonimmediate_operand (operands
[3], mode
))
19260 operands
[3] = force_reg (mode
, operands
[3]);
19262 if (! register_operand (operands
[2], VOIDmode
)
19264 || ! register_operand (operands
[3], VOIDmode
)))
19265 operands
[2] = force_reg (mode
, operands
[2]);
19268 && ! register_operand (operands
[3], VOIDmode
))
19269 operands
[3] = force_reg (mode
, operands
[3]);
19271 emit_insn (compare_seq
);
19272 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19273 gen_rtx_IF_THEN_ELSE (mode
,
19274 compare_op
, operands
[2],
19279 /* Swap, force into registers, or otherwise massage the two operands
19280 to an sse comparison with a mask result. Thus we differ a bit from
19281 ix86_prepare_fp_compare_args which expects to produce a flags result.
19283 The DEST operand exists to help determine whether to commute commutative
19284 operators. The POP0/POP1 operands are updated in place. The new
19285 comparison code is returned, or UNKNOWN if not implementable. */
19287 static enum rtx_code
19288 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19289 rtx
*pop0
, rtx
*pop1
)
19297 /* AVX supports all the needed comparisons. */
19300 /* We have no LTGT as an operator. We could implement it with
19301 NE & ORDERED, but this requires an extra temporary. It's
19302 not clear that it's worth it. */
19309 /* These are supported directly. */
19316 /* AVX has 3 operand comparisons, no need to swap anything. */
19319 /* For commutative operators, try to canonicalize the destination
19320 operand to be first in the comparison - this helps reload to
19321 avoid extra moves. */
19322 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19330 /* These are not supported directly before AVX, and furthermore
19331 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19332 comparison operands to transform into something that is
19337 code
= swap_condition (code
);
19341 gcc_unreachable ();
19347 /* Detect conditional moves that exactly match min/max operational
19348 semantics. Note that this is IEEE safe, as long as we don't
19349 interchange the operands.
19351 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19352 and TRUE if the operation is successful and instructions are emitted. */
19355 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19356 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19358 enum machine_mode mode
;
19364 else if (code
== UNGE
)
19367 if_true
= if_false
;
19373 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19375 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19380 mode
= GET_MODE (dest
);
19382 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19383 but MODE may be a vector mode and thus not appropriate. */
19384 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19386 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19389 if_true
= force_reg (mode
, if_true
);
19390 v
= gen_rtvec (2, if_true
, if_false
);
19391 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19395 code
= is_min
? SMIN
: SMAX
;
19396 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19399 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19403 /* Expand an sse vector comparison. Return the register with the result. */
19406 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19407 rtx op_true
, rtx op_false
)
19409 enum machine_mode mode
= GET_MODE (dest
);
19410 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19413 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19414 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19415 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19418 || reg_overlap_mentioned_p (dest
, op_true
)
19419 || reg_overlap_mentioned_p (dest
, op_false
))
19420 dest
= gen_reg_rtx (mode
);
19422 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19423 if (cmp_mode
!= mode
)
19425 x
= force_reg (cmp_mode
, x
);
19426 convert_move (dest
, x
, false);
19429 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19434 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19435 operations. This is used for both scalar and vector conditional moves. */
19438 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19440 enum machine_mode mode
= GET_MODE (dest
);
19443 if (vector_all_ones_operand (op_true
, mode
)
19444 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19446 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19448 else if (op_false
== CONST0_RTX (mode
))
19450 op_true
= force_reg (mode
, op_true
);
19451 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19452 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19454 else if (op_true
== CONST0_RTX (mode
))
19456 op_false
= force_reg (mode
, op_false
);
19457 x
= gen_rtx_NOT (mode
, cmp
);
19458 x
= gen_rtx_AND (mode
, x
, op_false
);
19459 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19461 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19463 op_false
= force_reg (mode
, op_false
);
19464 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19465 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19467 else if (TARGET_XOP
)
19469 op_true
= force_reg (mode
, op_true
);
19471 if (!nonimmediate_operand (op_false
, mode
))
19472 op_false
= force_reg (mode
, op_false
);
19474 emit_insn (gen_rtx_SET (mode
, dest
,
19475 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19481 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19483 if (!nonimmediate_operand (op_true
, mode
))
19484 op_true
= force_reg (mode
, op_true
);
19486 op_false
= force_reg (mode
, op_false
);
19492 gen
= gen_sse4_1_blendvps
;
19496 gen
= gen_sse4_1_blendvpd
;
19504 gen
= gen_sse4_1_pblendvb
;
19505 dest
= gen_lowpart (V16QImode
, dest
);
19506 op_false
= gen_lowpart (V16QImode
, op_false
);
19507 op_true
= gen_lowpart (V16QImode
, op_true
);
19508 cmp
= gen_lowpart (V16QImode
, cmp
);
19513 gen
= gen_avx_blendvps256
;
19517 gen
= gen_avx_blendvpd256
;
19525 gen
= gen_avx2_pblendvb
;
19526 dest
= gen_lowpart (V32QImode
, dest
);
19527 op_false
= gen_lowpart (V32QImode
, op_false
);
19528 op_true
= gen_lowpart (V32QImode
, op_true
);
19529 cmp
= gen_lowpart (V32QImode
, cmp
);
19537 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19540 op_true
= force_reg (mode
, op_true
);
19542 t2
= gen_reg_rtx (mode
);
19544 t3
= gen_reg_rtx (mode
);
19548 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19549 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19551 x
= gen_rtx_NOT (mode
, cmp
);
19552 x
= gen_rtx_AND (mode
, x
, op_false
);
19553 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19555 x
= gen_rtx_IOR (mode
, t3
, t2
);
19556 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19561 /* Expand a floating-point conditional move. Return true if successful. */
19564 ix86_expand_fp_movcc (rtx operands
[])
19566 enum machine_mode mode
= GET_MODE (operands
[0]);
19567 enum rtx_code code
= GET_CODE (operands
[1]);
19568 rtx tmp
, compare_op
;
19569 rtx op0
= XEXP (operands
[1], 0);
19570 rtx op1
= XEXP (operands
[1], 1);
19572 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19574 enum machine_mode cmode
;
19576 /* Since we've no cmove for sse registers, don't force bad register
19577 allocation just to gain access to it. Deny movcc when the
19578 comparison mode doesn't match the move mode. */
19579 cmode
= GET_MODE (op0
);
19580 if (cmode
== VOIDmode
)
19581 cmode
= GET_MODE (op1
);
19585 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19586 if (code
== UNKNOWN
)
19589 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19590 operands
[2], operands
[3]))
19593 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19594 operands
[2], operands
[3]);
19595 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19599 /* The floating point conditional move instructions don't directly
19600 support conditions resulting from a signed integer comparison. */
19602 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19603 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19605 tmp
= gen_reg_rtx (QImode
);
19606 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19608 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19611 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19612 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19613 operands
[2], operands
[3])));
19618 /* Expand a floating-point vector conditional move; a vcond operation
19619 rather than a movcc operation. */
19622 ix86_expand_fp_vcond (rtx operands
[])
19624 enum rtx_code code
= GET_CODE (operands
[3]);
19627 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19628 &operands
[4], &operands
[5]);
19629 if (code
== UNKNOWN
)
19632 switch (GET_CODE (operands
[3]))
19635 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19636 operands
[5], operands
[0], operands
[0]);
19637 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19638 operands
[5], operands
[1], operands
[2]);
19642 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19643 operands
[5], operands
[0], operands
[0]);
19644 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19645 operands
[5], operands
[1], operands
[2]);
19649 gcc_unreachable ();
19651 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19653 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19657 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19658 operands
[5], operands
[1], operands
[2]))
19661 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19662 operands
[1], operands
[2]);
19663 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19667 /* Expand a signed/unsigned integral vector conditional move. */
19670 ix86_expand_int_vcond (rtx operands
[])
19672 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19673 enum machine_mode mode
= GET_MODE (operands
[4]);
19674 enum rtx_code code
= GET_CODE (operands
[3]);
19675 bool negate
= false;
19678 cop0
= operands
[4];
19679 cop1
= operands
[5];
19681 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19682 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19683 if ((code
== LT
|| code
== GE
)
19684 && data_mode
== mode
19685 && cop1
== CONST0_RTX (mode
)
19686 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
19687 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
19688 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
19689 && (GET_MODE_SIZE (data_mode
) == 16
19690 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
19692 rtx negop
= operands
[2 - (code
== LT
)];
19693 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
19694 if (negop
== CONST1_RTX (data_mode
))
19696 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
19697 operands
[0], 1, OPTAB_DIRECT
);
19698 if (res
!= operands
[0])
19699 emit_move_insn (operands
[0], res
);
19702 else if (GET_MODE_INNER (data_mode
) != DImode
19703 && vector_all_ones_operand (negop
, data_mode
))
19705 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
19706 operands
[0], 0, OPTAB_DIRECT
);
19707 if (res
!= operands
[0])
19708 emit_move_insn (operands
[0], res
);
19713 if (!nonimmediate_operand (cop1
, mode
))
19714 cop1
= force_reg (mode
, cop1
);
19715 if (!general_operand (operands
[1], data_mode
))
19716 operands
[1] = force_reg (data_mode
, operands
[1]);
19717 if (!general_operand (operands
[2], data_mode
))
19718 operands
[2] = force_reg (data_mode
, operands
[2]);
19720 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19722 && (mode
== V16QImode
|| mode
== V8HImode
19723 || mode
== V4SImode
|| mode
== V2DImode
))
19727 /* Canonicalize the comparison to EQ, GT, GTU. */
19738 code
= reverse_condition (code
);
19744 code
= reverse_condition (code
);
19750 code
= swap_condition (code
);
19751 x
= cop0
, cop0
= cop1
, cop1
= x
;
19755 gcc_unreachable ();
19758 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19759 if (mode
== V2DImode
)
19764 /* SSE4.1 supports EQ. */
19765 if (!TARGET_SSE4_1
)
19771 /* SSE4.2 supports GT/GTU. */
19772 if (!TARGET_SSE4_2
)
19777 gcc_unreachable ();
19781 /* Unsigned parallel compare is not supported by the hardware.
19782 Play some tricks to turn this into a signed comparison
19786 cop0
= force_reg (mode
, cop0
);
19796 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19800 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19801 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19802 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19803 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19805 gcc_unreachable ();
19807 /* Subtract (-(INT MAX) - 1) from both operands to make
19809 mask
= ix86_build_signbit_mask (mode
, true, false);
19810 t1
= gen_reg_rtx (mode
);
19811 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19813 t2
= gen_reg_rtx (mode
);
19814 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19826 /* Perform a parallel unsigned saturating subtraction. */
19827 x
= gen_reg_rtx (mode
);
19828 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19829 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19832 cop1
= CONST0_RTX (mode
);
19838 gcc_unreachable ();
19843 /* Allow the comparison to be done in one mode, but the movcc to
19844 happen in another mode. */
19845 if (data_mode
== mode
)
19847 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
19848 operands
[1+negate
], operands
[2-negate
]);
19852 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
19853 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
19855 operands
[1+negate
], operands
[2-negate
]);
19856 x
= gen_lowpart (data_mode
, x
);
19859 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
19860 operands
[2-negate
]);
19864 /* Expand a variable vector permutation. */
19867 ix86_expand_vec_perm (rtx operands
[])
19869 rtx target
= operands
[0];
19870 rtx op0
= operands
[1];
19871 rtx op1
= operands
[2];
19872 rtx mask
= operands
[3];
19873 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
19874 enum machine_mode mode
= GET_MODE (op0
);
19875 enum machine_mode maskmode
= GET_MODE (mask
);
19877 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
19879 /* Number of elements in the vector. */
19880 w
= GET_MODE_NUNITS (mode
);
19881 e
= GET_MODE_UNIT_SIZE (mode
);
19882 gcc_assert (w
<= 32);
19886 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
19888 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19889 an constant shuffle operand. With a tiny bit of effort we can
19890 use VPERMD instead. A re-interpretation stall for V4DFmode is
19891 unfortunate but there's no avoiding it.
19892 Similarly for V16HImode we don't have instructions for variable
19893 shuffling, while for V32QImode we can use after preparing suitable
19894 masks vpshufb; vpshufb; vpermq; vpor. */
19896 if (mode
== V16HImode
)
19898 maskmode
= mode
= V32QImode
;
19904 maskmode
= mode
= V8SImode
;
19908 t1
= gen_reg_rtx (maskmode
);
19910 /* Replicate the low bits of the V4DImode mask into V8SImode:
19912 t1 = { A A B B C C D D }. */
19913 for (i
= 0; i
< w
/ 2; ++i
)
19914 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
19915 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19916 vt
= force_reg (maskmode
, vt
);
19917 mask
= gen_lowpart (maskmode
, mask
);
19918 if (maskmode
== V8SImode
)
19919 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
19921 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
19923 /* Multiply the shuffle indicies by two. */
19924 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
19927 /* Add one to the odd shuffle indicies:
19928 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19929 for (i
= 0; i
< w
/ 2; ++i
)
19931 vec
[i
* 2] = const0_rtx
;
19932 vec
[i
* 2 + 1] = const1_rtx
;
19934 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19935 vt
= force_const_mem (maskmode
, vt
);
19936 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
19939 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19940 operands
[3] = mask
= t1
;
19941 target
= gen_lowpart (mode
, target
);
19942 op0
= gen_lowpart (mode
, op0
);
19943 op1
= gen_lowpart (mode
, op1
);
19949 /* The VPERMD and VPERMPS instructions already properly ignore
19950 the high bits of the shuffle elements. No need for us to
19951 perform an AND ourselves. */
19952 if (one_operand_shuffle
)
19953 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
19956 t1
= gen_reg_rtx (V8SImode
);
19957 t2
= gen_reg_rtx (V8SImode
);
19958 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
19959 emit_insn (gen_avx2_permvarv8si (t2
, op0
, mask
));
19965 mask
= gen_lowpart (V8SFmode
, mask
);
19966 if (one_operand_shuffle
)
19967 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
19970 t1
= gen_reg_rtx (V8SFmode
);
19971 t2
= gen_reg_rtx (V8SFmode
);
19972 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
19973 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
19979 /* By combining the two 128-bit input vectors into one 256-bit
19980 input vector, we can use VPERMD and VPERMPS for the full
19981 two-operand shuffle. */
19982 t1
= gen_reg_rtx (V8SImode
);
19983 t2
= gen_reg_rtx (V8SImode
);
19984 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
19985 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
19986 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
19987 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
19991 t1
= gen_reg_rtx (V8SFmode
);
19992 t2
= gen_reg_rtx (V8SFmode
);
19993 mask
= gen_lowpart (V4SFmode
, mask
);
19994 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
19995 emit_insn (gen_avx_vec_concatv8sf (t2
, mask
, mask
));
19996 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
19997 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20001 t1
= gen_reg_rtx (V32QImode
);
20002 t2
= gen_reg_rtx (V32QImode
);
20003 t3
= gen_reg_rtx (V32QImode
);
20004 vt2
= GEN_INT (128);
20005 for (i
= 0; i
< 32; i
++)
20007 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20008 vt
= force_reg (V32QImode
, vt
);
20009 for (i
= 0; i
< 32; i
++)
20010 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20011 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20012 vt2
= force_reg (V32QImode
, vt2
);
20013 /* From mask create two adjusted masks, which contain the same
20014 bits as mask in the low 7 bits of each vector element.
20015 The first mask will have the most significant bit clear
20016 if it requests element from the same 128-bit lane
20017 and MSB set if it requests element from the other 128-bit lane.
20018 The second mask will have the opposite values of the MSB,
20019 and additionally will have its 128-bit lanes swapped.
20020 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20021 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20022 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20023 stands for other 12 bytes. */
20024 /* The bit whether element is from the same lane or the other
20025 lane is bit 4, so shift it up by 3 to the MSB position. */
20026 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20027 gen_lowpart (V4DImode
, mask
),
20029 /* Clear MSB bits from the mask just in case it had them set. */
20030 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20031 /* After this t1 will have MSB set for elements from other lane. */
20032 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20033 /* Clear bits other than MSB. */
20034 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20035 /* Or in the lower bits from mask into t3. */
20036 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20037 /* And invert MSB bits in t1, so MSB is set for elements from the same
20039 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20040 /* Swap 128-bit lanes in t3. */
20041 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20042 gen_lowpart (V4DImode
, t3
),
20043 const2_rtx
, GEN_INT (3),
20044 const0_rtx
, const1_rtx
));
20045 /* And or in the lower bits from mask into t1. */
20046 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20047 if (one_operand_shuffle
)
20049 /* Each of these shuffles will put 0s in places where
20050 element from the other 128-bit lane is needed, otherwise
20051 will shuffle in the requested value. */
20052 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20053 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20054 /* For t3 the 128-bit lanes are swapped again. */
20055 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20056 gen_lowpart (V4DImode
, t3
),
20057 const2_rtx
, GEN_INT (3),
20058 const0_rtx
, const1_rtx
));
20059 /* And oring both together leads to the result. */
20060 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20064 t4
= gen_reg_rtx (V32QImode
);
20065 /* Similarly to the above one_operand_shuffle code,
20066 just for repeated twice for each operand. merge_two:
20067 code will merge the two results together. */
20068 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20069 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20070 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20071 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20072 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20073 gen_lowpart (V4DImode
, t4
),
20074 const2_rtx
, GEN_INT (3),
20075 const0_rtx
, const1_rtx
));
20076 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20077 gen_lowpart (V4DImode
, t3
),
20078 const2_rtx
, GEN_INT (3),
20079 const0_rtx
, const1_rtx
));
20080 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20081 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20087 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20094 /* The XOP VPPERM insn supports three inputs. By ignoring the
20095 one_operand_shuffle special case, we avoid creating another
20096 set of constant vectors in memory. */
20097 one_operand_shuffle
= false;
20099 /* mask = mask & {2*w-1, ...} */
20100 vt
= GEN_INT (2*w
- 1);
20104 /* mask = mask & {w-1, ...} */
20105 vt
= GEN_INT (w
- 1);
20108 for (i
= 0; i
< w
; i
++)
20110 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20111 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20112 NULL_RTX
, 0, OPTAB_DIRECT
);
20114 /* For non-QImode operations, convert the word permutation control
20115 into a byte permutation control. */
20116 if (mode
!= V16QImode
)
20118 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20119 GEN_INT (exact_log2 (e
)),
20120 NULL_RTX
, 0, OPTAB_DIRECT
);
20122 /* Convert mask to vector of chars. */
20123 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20125 /* Replicate each of the input bytes into byte positions:
20126 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20127 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20128 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20129 for (i
= 0; i
< 16; ++i
)
20130 vec
[i
] = GEN_INT (i
/e
* e
);
20131 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20132 vt
= force_const_mem (V16QImode
, vt
);
20134 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20136 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20138 /* Convert it into the byte positions by doing
20139 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20140 for (i
= 0; i
< 16; ++i
)
20141 vec
[i
] = GEN_INT (i
% e
);
20142 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20143 vt
= force_const_mem (V16QImode
, vt
);
20144 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20147 /* The actual shuffle operations all operate on V16QImode. */
20148 op0
= gen_lowpart (V16QImode
, op0
);
20149 op1
= gen_lowpart (V16QImode
, op1
);
20150 target
= gen_lowpart (V16QImode
, target
);
20154 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20156 else if (one_operand_shuffle
)
20158 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20165 /* Shuffle the two input vectors independently. */
20166 t1
= gen_reg_rtx (V16QImode
);
20167 t2
= gen_reg_rtx (V16QImode
);
20168 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20169 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20172 /* Then merge them together. The key is whether any given control
20173 element contained a bit set that indicates the second word. */
20174 mask
= operands
[3];
20176 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20178 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20179 more shuffle to convert the V2DI input mask into a V4SI
20180 input mask. At which point the masking that expand_int_vcond
20181 will work as desired. */
20182 rtx t3
= gen_reg_rtx (V4SImode
);
20183 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20184 const0_rtx
, const0_rtx
,
20185 const2_rtx
, const2_rtx
));
20187 maskmode
= V4SImode
;
20191 for (i
= 0; i
< w
; i
++)
20193 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20194 vt
= force_reg (maskmode
, vt
);
20195 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20196 NULL_RTX
, 0, OPTAB_DIRECT
);
20198 xops
[0] = gen_lowpart (mode
, operands
[0]);
20199 xops
[1] = gen_lowpart (mode
, t2
);
20200 xops
[2] = gen_lowpart (mode
, t1
);
20201 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20204 ok
= ix86_expand_int_vcond (xops
);
20209 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20210 true if we should do zero extension, else sign extension. HIGH_P is
20211 true if we want the N/2 high elements, else the low elements. */
20214 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
20216 enum machine_mode imode
= GET_MODE (operands
[1]);
20221 rtx (*unpack
)(rtx
, rtx
);
20222 rtx (*extract
)(rtx
, rtx
) = NULL
;
20223 enum machine_mode halfmode
= BLKmode
;
20229 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20231 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20232 halfmode
= V16QImode
;
20234 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20238 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20240 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20241 halfmode
= V8HImode
;
20243 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20247 unpack
= gen_avx2_zero_extendv4siv4di2
;
20249 unpack
= gen_avx2_sign_extendv4siv4di2
;
20250 halfmode
= V4SImode
;
20252 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20256 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20258 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20262 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20264 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20268 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20270 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20273 gcc_unreachable ();
20276 if (GET_MODE_SIZE (imode
) == 32)
20278 tmp
= gen_reg_rtx (halfmode
);
20279 emit_insn (extract (tmp
, operands
[1]));
20283 /* Shift higher 8 bytes to lower 8 bytes. */
20284 tmp
= gen_reg_rtx (imode
);
20285 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20286 gen_lowpart (V1TImode
, operands
[1]),
20292 emit_insn (unpack (operands
[0], tmp
));
20296 rtx (*unpack
)(rtx
, rtx
, rtx
);
20302 unpack
= gen_vec_interleave_highv16qi
;
20304 unpack
= gen_vec_interleave_lowv16qi
;
20308 unpack
= gen_vec_interleave_highv8hi
;
20310 unpack
= gen_vec_interleave_lowv8hi
;
20314 unpack
= gen_vec_interleave_highv4si
;
20316 unpack
= gen_vec_interleave_lowv4si
;
20319 gcc_unreachable ();
20322 dest
= gen_lowpart (imode
, operands
[0]);
20325 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20327 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20328 operands
[1], pc_rtx
, pc_rtx
);
20330 emit_insn (unpack (dest
, operands
[1], tmp
));
20334 /* Expand conditional increment or decrement using adb/sbb instructions.
20335 The default case using setcc followed by the conditional move can be
20336 done by generic code. */
20338 ix86_expand_int_addcc (rtx operands
[])
20340 enum rtx_code code
= GET_CODE (operands
[1]);
20342 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20344 rtx val
= const0_rtx
;
20345 bool fpcmp
= false;
20346 enum machine_mode mode
;
20347 rtx op0
= XEXP (operands
[1], 0);
20348 rtx op1
= XEXP (operands
[1], 1);
20350 if (operands
[3] != const1_rtx
20351 && operands
[3] != constm1_rtx
)
20353 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20355 code
= GET_CODE (compare_op
);
20357 flags
= XEXP (compare_op
, 0);
20359 if (GET_MODE (flags
) == CCFPmode
20360 || GET_MODE (flags
) == CCFPUmode
)
20363 code
= ix86_fp_compare_code_to_integer (code
);
20370 PUT_CODE (compare_op
,
20371 reverse_condition_maybe_unordered
20372 (GET_CODE (compare_op
)));
20374 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20377 mode
= GET_MODE (operands
[0]);
20379 /* Construct either adc or sbb insn. */
20380 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20385 insn
= gen_subqi3_carry
;
20388 insn
= gen_subhi3_carry
;
20391 insn
= gen_subsi3_carry
;
20394 insn
= gen_subdi3_carry
;
20397 gcc_unreachable ();
20405 insn
= gen_addqi3_carry
;
20408 insn
= gen_addhi3_carry
;
20411 insn
= gen_addsi3_carry
;
20414 insn
= gen_adddi3_carry
;
20417 gcc_unreachable ();
20420 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20426 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20427 but works for floating pointer parameters and nonoffsetable memories.
20428 For pushes, it returns just stack offsets; the values will be saved
20429 in the right order. Maximally three parts are generated. */
20432 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20437 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20439 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20441 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20442 gcc_assert (size
>= 2 && size
<= 4);
20444 /* Optimize constant pool reference to immediates. This is used by fp
20445 moves, that force all constants to memory to allow combining. */
20446 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20448 rtx tmp
= maybe_get_pool_constant (operand
);
20453 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20455 /* The only non-offsetable memories we handle are pushes. */
20456 int ok
= push_operand (operand
, VOIDmode
);
20460 operand
= copy_rtx (operand
);
20461 PUT_MODE (operand
, word_mode
);
20462 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20466 if (GET_CODE (operand
) == CONST_VECTOR
)
20468 enum machine_mode imode
= int_mode_for_mode (mode
);
20469 /* Caution: if we looked through a constant pool memory above,
20470 the operand may actually have a different mode now. That's
20471 ok, since we want to pun this all the way back to an integer. */
20472 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20473 gcc_assert (operand
!= NULL
);
20479 if (mode
== DImode
)
20480 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20485 if (REG_P (operand
))
20487 gcc_assert (reload_completed
);
20488 for (i
= 0; i
< size
; i
++)
20489 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20491 else if (offsettable_memref_p (operand
))
20493 operand
= adjust_address (operand
, SImode
, 0);
20494 parts
[0] = operand
;
20495 for (i
= 1; i
< size
; i
++)
20496 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20498 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20503 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20507 real_to_target (l
, &r
, mode
);
20508 parts
[3] = gen_int_mode (l
[3], SImode
);
20509 parts
[2] = gen_int_mode (l
[2], SImode
);
20512 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20513 parts
[2] = gen_int_mode (l
[2], SImode
);
20516 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20519 gcc_unreachable ();
20521 parts
[1] = gen_int_mode (l
[1], SImode
);
20522 parts
[0] = gen_int_mode (l
[0], SImode
);
20525 gcc_unreachable ();
20530 if (mode
== TImode
)
20531 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20532 if (mode
== XFmode
|| mode
== TFmode
)
20534 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20535 if (REG_P (operand
))
20537 gcc_assert (reload_completed
);
20538 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20539 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20541 else if (offsettable_memref_p (operand
))
20543 operand
= adjust_address (operand
, DImode
, 0);
20544 parts
[0] = operand
;
20545 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20547 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20552 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20553 real_to_target (l
, &r
, mode
);
20555 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20556 if (HOST_BITS_PER_WIDE_INT
>= 64)
20559 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20560 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20563 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20565 if (upper_mode
== SImode
)
20566 parts
[1] = gen_int_mode (l
[2], SImode
);
20567 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20570 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20571 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20574 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20577 gcc_unreachable ();
20584 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20585 Return false when normal moves are needed; true when all required
20586 insns have been emitted. Operands 2-4 contain the input values
20587 int the correct order; operands 5-7 contain the output values. */
20590 ix86_split_long_move (rtx operands
[])
20595 int collisions
= 0;
20596 enum machine_mode mode
= GET_MODE (operands
[0]);
20597 bool collisionparts
[4];
20599 /* The DFmode expanders may ask us to move double.
20600 For 64bit target this is single move. By hiding the fact
20601 here we simplify i386.md splitters. */
20602 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20604 /* Optimize constant pool reference to immediates. This is used by
20605 fp moves, that force all constants to memory to allow combining. */
20607 if (MEM_P (operands
[1])
20608 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20609 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20610 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20611 if (push_operand (operands
[0], VOIDmode
))
20613 operands
[0] = copy_rtx (operands
[0]);
20614 PUT_MODE (operands
[0], word_mode
);
20617 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20618 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20619 emit_move_insn (operands
[0], operands
[1]);
20623 /* The only non-offsettable memory we handle is push. */
20624 if (push_operand (operands
[0], VOIDmode
))
20627 gcc_assert (!MEM_P (operands
[0])
20628 || offsettable_memref_p (operands
[0]));
20630 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20631 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20633 /* When emitting push, take care for source operands on the stack. */
20634 if (push
&& MEM_P (operands
[1])
20635 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20637 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20639 /* Compensate for the stack decrement by 4. */
20640 if (!TARGET_64BIT
&& nparts
== 3
20641 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20642 src_base
= plus_constant (Pmode
, src_base
, 4);
20644 /* src_base refers to the stack pointer and is
20645 automatically decreased by emitted push. */
20646 for (i
= 0; i
< nparts
; i
++)
20647 part
[1][i
] = change_address (part
[1][i
],
20648 GET_MODE (part
[1][i
]), src_base
);
20651 /* We need to do copy in the right order in case an address register
20652 of the source overlaps the destination. */
20653 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20657 for (i
= 0; i
< nparts
; i
++)
20660 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20661 if (collisionparts
[i
])
20665 /* Collision in the middle part can be handled by reordering. */
20666 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20668 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20669 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20671 else if (collisions
== 1
20673 && (collisionparts
[1] || collisionparts
[2]))
20675 if (collisionparts
[1])
20677 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20678 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20682 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20683 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20687 /* If there are more collisions, we can't handle it by reordering.
20688 Do an lea to the last part and use only one colliding move. */
20689 else if (collisions
> 1)
20695 base
= part
[0][nparts
- 1];
20697 /* Handle the case when the last part isn't valid for lea.
20698 Happens in 64-bit mode storing the 12-byte XFmode. */
20699 if (GET_MODE (base
) != Pmode
)
20700 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20702 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20703 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20704 for (i
= 1; i
< nparts
; i
++)
20706 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
20707 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20718 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20719 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
20720 stack_pointer_rtx
, GEN_INT (-4)));
20721 emit_move_insn (part
[0][2], part
[1][2]);
20723 else if (nparts
== 4)
20725 emit_move_insn (part
[0][3], part
[1][3]);
20726 emit_move_insn (part
[0][2], part
[1][2]);
20731 /* In 64bit mode we don't have 32bit push available. In case this is
20732 register, it is OK - we will just use larger counterpart. We also
20733 retype memory - these comes from attempt to avoid REX prefix on
20734 moving of second half of TFmode value. */
20735 if (GET_MODE (part
[1][1]) == SImode
)
20737 switch (GET_CODE (part
[1][1]))
20740 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20744 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20748 gcc_unreachable ();
20751 if (GET_MODE (part
[1][0]) == SImode
)
20752 part
[1][0] = part
[1][1];
20755 emit_move_insn (part
[0][1], part
[1][1]);
20756 emit_move_insn (part
[0][0], part
[1][0]);
20760 /* Choose correct order to not overwrite the source before it is copied. */
20761 if ((REG_P (part
[0][0])
20762 && REG_P (part
[1][1])
20763 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20765 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20767 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20769 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20771 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20773 operands
[2 + i
] = part
[0][j
];
20774 operands
[6 + i
] = part
[1][j
];
20779 for (i
= 0; i
< nparts
; i
++)
20781 operands
[2 + i
] = part
[0][i
];
20782 operands
[6 + i
] = part
[1][i
];
20786 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20787 if (optimize_insn_for_size_p ())
20789 for (j
= 0; j
< nparts
- 1; j
++)
20790 if (CONST_INT_P (operands
[6 + j
])
20791 && operands
[6 + j
] != const0_rtx
20792 && REG_P (operands
[2 + j
]))
20793 for (i
= j
; i
< nparts
- 1; i
++)
20794 if (CONST_INT_P (operands
[7 + i
])
20795 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20796 operands
[7 + i
] = operands
[2 + j
];
20799 for (i
= 0; i
< nparts
; i
++)
20800 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20805 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20806 left shift by a constant, either using a single shift or
20807 a sequence of add instructions. */
20810 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20812 rtx (*insn
)(rtx
, rtx
, rtx
);
20815 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20816 && !optimize_insn_for_size_p ()))
20818 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20819 while (count
-- > 0)
20820 emit_insn (insn (operand
, operand
, operand
));
20824 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20825 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20830 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20832 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20833 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20834 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20836 rtx low
[2], high
[2];
20839 if (CONST_INT_P (operands
[2]))
20841 split_double_mode (mode
, operands
, 2, low
, high
);
20842 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20844 if (count
>= half_width
)
20846 emit_move_insn (high
[0], low
[1]);
20847 emit_move_insn (low
[0], const0_rtx
);
20849 if (count
> half_width
)
20850 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
20854 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20856 if (!rtx_equal_p (operands
[0], operands
[1]))
20857 emit_move_insn (operands
[0], operands
[1]);
20859 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
20860 ix86_expand_ashl_const (low
[0], count
, mode
);
20865 split_double_mode (mode
, operands
, 1, low
, high
);
20867 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20869 if (operands
[1] == const1_rtx
)
20871 /* Assuming we've chosen a QImode capable registers, then 1 << N
20872 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20873 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
20875 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
20877 ix86_expand_clear (low
[0]);
20878 ix86_expand_clear (high
[0]);
20879 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
20881 d
= gen_lowpart (QImode
, low
[0]);
20882 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20883 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
20884 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20886 d
= gen_lowpart (QImode
, high
[0]);
20887 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20888 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
20889 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20892 /* Otherwise, we can get the same results by manually performing
20893 a bit extract operation on bit 5/6, and then performing the two
20894 shifts. The two methods of getting 0/1 into low/high are exactly
20895 the same size. Avoiding the shift in the bit extract case helps
20896 pentium4 a bit; no one else seems to care much either way. */
20899 enum machine_mode half_mode
;
20900 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
20901 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
20902 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
20903 HOST_WIDE_INT bits
;
20906 if (mode
== DImode
)
20908 half_mode
= SImode
;
20909 gen_lshr3
= gen_lshrsi3
;
20910 gen_and3
= gen_andsi3
;
20911 gen_xor3
= gen_xorsi3
;
20916 half_mode
= DImode
;
20917 gen_lshr3
= gen_lshrdi3
;
20918 gen_and3
= gen_anddi3
;
20919 gen_xor3
= gen_xordi3
;
20923 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
20924 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
20926 x
= gen_lowpart (half_mode
, operands
[2]);
20927 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
20929 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
20930 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
20931 emit_move_insn (low
[0], high
[0]);
20932 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
20935 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20936 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
20940 if (operands
[1] == constm1_rtx
)
20942 /* For -1 << N, we can avoid the shld instruction, because we
20943 know that we're shifting 0...31/63 ones into a -1. */
20944 emit_move_insn (low
[0], constm1_rtx
);
20945 if (optimize_insn_for_size_p ())
20946 emit_move_insn (high
[0], low
[0]);
20948 emit_move_insn (high
[0], constm1_rtx
);
20952 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20954 if (!rtx_equal_p (operands
[0], operands
[1]))
20955 emit_move_insn (operands
[0], operands
[1]);
20957 split_double_mode (mode
, operands
, 1, low
, high
);
20958 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
20961 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20963 if (TARGET_CMOVE
&& scratch
)
20965 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20966 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20968 ix86_expand_clear (scratch
);
20969 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
20973 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20974 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20976 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
20981 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20983 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
20984 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
20985 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20986 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20988 rtx low
[2], high
[2];
20991 if (CONST_INT_P (operands
[2]))
20993 split_double_mode (mode
, operands
, 2, low
, high
);
20994 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20996 if (count
== GET_MODE_BITSIZE (mode
) - 1)
20998 emit_move_insn (high
[0], high
[1]);
20999 emit_insn (gen_ashr3 (high
[0], high
[0],
21000 GEN_INT (half_width
- 1)));
21001 emit_move_insn (low
[0], high
[0]);
21004 else if (count
>= half_width
)
21006 emit_move_insn (low
[0], high
[1]);
21007 emit_move_insn (high
[0], low
[0]);
21008 emit_insn (gen_ashr3 (high
[0], high
[0],
21009 GEN_INT (half_width
- 1)));
21011 if (count
> half_width
)
21012 emit_insn (gen_ashr3 (low
[0], low
[0],
21013 GEN_INT (count
- half_width
)));
21017 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21019 if (!rtx_equal_p (operands
[0], operands
[1]))
21020 emit_move_insn (operands
[0], operands
[1]);
21022 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21023 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21028 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21030 if (!rtx_equal_p (operands
[0], operands
[1]))
21031 emit_move_insn (operands
[0], operands
[1]);
21033 split_double_mode (mode
, operands
, 1, low
, high
);
21035 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21036 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21038 if (TARGET_CMOVE
&& scratch
)
21040 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21041 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21043 emit_move_insn (scratch
, high
[0]);
21044 emit_insn (gen_ashr3 (scratch
, scratch
,
21045 GEN_INT (half_width
- 1)));
21046 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21051 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21052 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21054 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21060 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21062 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21063 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21064 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21065 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21067 rtx low
[2], high
[2];
21070 if (CONST_INT_P (operands
[2]))
21072 split_double_mode (mode
, operands
, 2, low
, high
);
21073 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21075 if (count
>= half_width
)
21077 emit_move_insn (low
[0], high
[1]);
21078 ix86_expand_clear (high
[0]);
21080 if (count
> half_width
)
21081 emit_insn (gen_lshr3 (low
[0], low
[0],
21082 GEN_INT (count
- half_width
)));
21086 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21088 if (!rtx_equal_p (operands
[0], operands
[1]))
21089 emit_move_insn (operands
[0], operands
[1]);
21091 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21092 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21097 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21099 if (!rtx_equal_p (operands
[0], operands
[1]))
21100 emit_move_insn (operands
[0], operands
[1]);
21102 split_double_mode (mode
, operands
, 1, low
, high
);
21104 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21105 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21107 if (TARGET_CMOVE
&& scratch
)
21109 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21110 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21112 ix86_expand_clear (scratch
);
21113 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21118 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21119 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21121 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21126 /* Predict just emitted jump instruction to be taken with probability PROB. */
21128 predict_jump (int prob
)
21130 rtx insn
= get_last_insn ();
21131 gcc_assert (JUMP_P (insn
));
21132 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21135 /* Helper function for the string operations below. Dest VARIABLE whether
21136 it is aligned to VALUE bytes. If true, jump to the label. */
21138 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21140 rtx label
= gen_label_rtx ();
21141 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21142 if (GET_MODE (variable
) == DImode
)
21143 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21145 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21146 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21149 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21151 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21155 /* Adjust COUNTER by the VALUE. */
21157 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21159 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21160 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21162 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21165 /* Zero extend possibly SImode EXP to Pmode register. */
21167 ix86_zero_extend_to_Pmode (rtx exp
)
21169 if (GET_MODE (exp
) != Pmode
)
21170 exp
= convert_to_mode (Pmode
, exp
, 1);
21171 return force_reg (Pmode
, exp
);
21174 /* Divide COUNTREG by SCALE. */
21176 scale_counter (rtx countreg
, int scale
)
21182 if (CONST_INT_P (countreg
))
21183 return GEN_INT (INTVAL (countreg
) / scale
);
21184 gcc_assert (REG_P (countreg
));
21186 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21187 GEN_INT (exact_log2 (scale
)),
21188 NULL
, 1, OPTAB_DIRECT
);
21192 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21193 DImode for constant loop counts. */
21195 static enum machine_mode
21196 counter_mode (rtx count_exp
)
21198 if (GET_MODE (count_exp
) != VOIDmode
)
21199 return GET_MODE (count_exp
);
21200 if (!CONST_INT_P (count_exp
))
21202 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21207 /* When SRCPTR is non-NULL, output simple loop to move memory
21208 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21209 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21210 equivalent loop to set memory by VALUE (supposed to be in MODE).
21212 The size is rounded down to whole number of chunk size moved at once.
21213 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21217 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21218 rtx destptr
, rtx srcptr
, rtx value
,
21219 rtx count
, enum machine_mode mode
, int unroll
,
21222 rtx out_label
, top_label
, iter
, tmp
;
21223 enum machine_mode iter_mode
= counter_mode (count
);
21224 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21225 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21231 top_label
= gen_label_rtx ();
21232 out_label
= gen_label_rtx ();
21233 iter
= gen_reg_rtx (iter_mode
);
21235 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21236 NULL
, 1, OPTAB_DIRECT
);
21237 /* Those two should combine. */
21238 if (piece_size
== const1_rtx
)
21240 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21242 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21244 emit_move_insn (iter
, const0_rtx
);
21246 emit_label (top_label
);
21248 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21249 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21250 destmem
= change_address (destmem
, mode
, x_addr
);
21254 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21255 srcmem
= change_address (srcmem
, mode
, y_addr
);
21257 /* When unrolling for chips that reorder memory reads and writes,
21258 we can save registers by using single temporary.
21259 Also using 4 temporaries is overkill in 32bit mode. */
21260 if (!TARGET_64BIT
&& 0)
21262 for (i
= 0; i
< unroll
; i
++)
21267 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21269 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21271 emit_move_insn (destmem
, srcmem
);
21277 gcc_assert (unroll
<= 4);
21278 for (i
= 0; i
< unroll
; i
++)
21280 tmpreg
[i
] = gen_reg_rtx (mode
);
21284 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21286 emit_move_insn (tmpreg
[i
], srcmem
);
21288 for (i
= 0; i
< unroll
; i
++)
21293 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21295 emit_move_insn (destmem
, tmpreg
[i
]);
21300 for (i
= 0; i
< unroll
; i
++)
21304 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21305 emit_move_insn (destmem
, value
);
21308 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21309 true, OPTAB_LIB_WIDEN
);
21311 emit_move_insn (iter
, tmp
);
21313 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21315 if (expected_size
!= -1)
21317 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21318 if (expected_size
== 0)
21320 else if (expected_size
> REG_BR_PROB_BASE
)
21321 predict_jump (REG_BR_PROB_BASE
- 1);
21323 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21326 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21327 iter
= ix86_zero_extend_to_Pmode (iter
);
21328 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21329 true, OPTAB_LIB_WIDEN
);
21330 if (tmp
!= destptr
)
21331 emit_move_insn (destptr
, tmp
);
21334 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21335 true, OPTAB_LIB_WIDEN
);
21337 emit_move_insn (srcptr
, tmp
);
21339 emit_label (out_label
);
21342 /* Output "rep; mov" instruction.
21343 Arguments have same meaning as for previous function */
21345 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21346 rtx destptr
, rtx srcptr
,
21348 enum machine_mode mode
)
21353 HOST_WIDE_INT rounded_count
;
21355 /* If the size is known, it is shorter to use rep movs. */
21356 if (mode
== QImode
&& CONST_INT_P (count
)
21357 && !(INTVAL (count
) & 3))
21360 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21361 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21362 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21363 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21364 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21365 if (mode
!= QImode
)
21367 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21368 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21369 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21370 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21371 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21372 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21376 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21377 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21379 if (CONST_INT_P (count
))
21381 rounded_count
= (INTVAL (count
)
21382 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21383 destmem
= shallow_copy_rtx (destmem
);
21384 srcmem
= shallow_copy_rtx (srcmem
);
21385 set_mem_size (destmem
, rounded_count
);
21386 set_mem_size (srcmem
, rounded_count
);
21390 if (MEM_SIZE_KNOWN_P (destmem
))
21391 clear_mem_size (destmem
);
21392 if (MEM_SIZE_KNOWN_P (srcmem
))
21393 clear_mem_size (srcmem
);
21395 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21399 /* Output "rep; stos" instruction.
21400 Arguments have same meaning as for previous function */
21402 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21403 rtx count
, enum machine_mode mode
,
21408 HOST_WIDE_INT rounded_count
;
21410 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21411 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21412 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21413 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21414 if (mode
!= QImode
)
21416 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21417 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21418 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21421 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21422 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21424 rounded_count
= (INTVAL (count
)
21425 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21426 destmem
= shallow_copy_rtx (destmem
);
21427 set_mem_size (destmem
, rounded_count
);
21429 else if (MEM_SIZE_KNOWN_P (destmem
))
21430 clear_mem_size (destmem
);
21431 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21435 emit_strmov (rtx destmem
, rtx srcmem
,
21436 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21438 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21439 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21440 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21443 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21445 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21446 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21449 if (CONST_INT_P (count
))
21451 HOST_WIDE_INT countval
= INTVAL (count
);
21454 if ((countval
& 0x10) && max_size
> 16)
21458 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21459 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21462 gcc_unreachable ();
21465 if ((countval
& 0x08) && max_size
> 8)
21468 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21471 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21472 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21476 if ((countval
& 0x04) && max_size
> 4)
21478 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21481 if ((countval
& 0x02) && max_size
> 2)
21483 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21486 if ((countval
& 0x01) && max_size
> 1)
21488 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21495 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21496 count
, 1, OPTAB_DIRECT
);
21497 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21498 count
, QImode
, 1, 4);
21502 /* When there are stringops, we can cheaply increase dest and src pointers.
21503 Otherwise we save code size by maintaining offset (zero is readily
21504 available from preceding rep operation) and using x86 addressing modes.
21506 if (TARGET_SINGLE_STRINGOP
)
21510 rtx label
= ix86_expand_aligntest (count
, 4, true);
21511 src
= change_address (srcmem
, SImode
, srcptr
);
21512 dest
= change_address (destmem
, SImode
, destptr
);
21513 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21514 emit_label (label
);
21515 LABEL_NUSES (label
) = 1;
21519 rtx label
= ix86_expand_aligntest (count
, 2, true);
21520 src
= change_address (srcmem
, HImode
, srcptr
);
21521 dest
= change_address (destmem
, HImode
, destptr
);
21522 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21523 emit_label (label
);
21524 LABEL_NUSES (label
) = 1;
21528 rtx label
= ix86_expand_aligntest (count
, 1, true);
21529 src
= change_address (srcmem
, QImode
, srcptr
);
21530 dest
= change_address (destmem
, QImode
, destptr
);
21531 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21532 emit_label (label
);
21533 LABEL_NUSES (label
) = 1;
21538 rtx offset
= force_reg (Pmode
, const0_rtx
);
21543 rtx label
= ix86_expand_aligntest (count
, 4, true);
21544 src
= change_address (srcmem
, SImode
, srcptr
);
21545 dest
= change_address (destmem
, SImode
, destptr
);
21546 emit_move_insn (dest
, src
);
21547 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21548 true, OPTAB_LIB_WIDEN
);
21550 emit_move_insn (offset
, tmp
);
21551 emit_label (label
);
21552 LABEL_NUSES (label
) = 1;
21556 rtx label
= ix86_expand_aligntest (count
, 2, true);
21557 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21558 src
= change_address (srcmem
, HImode
, tmp
);
21559 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21560 dest
= change_address (destmem
, HImode
, tmp
);
21561 emit_move_insn (dest
, src
);
21562 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21563 true, OPTAB_LIB_WIDEN
);
21565 emit_move_insn (offset
, tmp
);
21566 emit_label (label
);
21567 LABEL_NUSES (label
) = 1;
21571 rtx label
= ix86_expand_aligntest (count
, 1, true);
21572 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21573 src
= change_address (srcmem
, QImode
, tmp
);
21574 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21575 dest
= change_address (destmem
, QImode
, tmp
);
21576 emit_move_insn (dest
, src
);
21577 emit_label (label
);
21578 LABEL_NUSES (label
) = 1;
21583 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21585 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21586 rtx count
, int max_size
)
21589 expand_simple_binop (counter_mode (count
), AND
, count
,
21590 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21591 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21592 gen_lowpart (QImode
, value
), count
, QImode
,
21596 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21598 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21602 if (CONST_INT_P (count
))
21604 HOST_WIDE_INT countval
= INTVAL (count
);
21607 if ((countval
& 0x10) && max_size
> 16)
21611 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21612 emit_insn (gen_strset (destptr
, dest
, value
));
21613 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21614 emit_insn (gen_strset (destptr
, dest
, value
));
21617 gcc_unreachable ();
21620 if ((countval
& 0x08) && max_size
> 8)
21624 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21625 emit_insn (gen_strset (destptr
, dest
, value
));
21629 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21630 emit_insn (gen_strset (destptr
, dest
, value
));
21631 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21632 emit_insn (gen_strset (destptr
, dest
, value
));
21636 if ((countval
& 0x04) && max_size
> 4)
21638 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21639 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21642 if ((countval
& 0x02) && max_size
> 2)
21644 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21645 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21648 if ((countval
& 0x01) && max_size
> 1)
21650 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21651 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21658 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21663 rtx label
= ix86_expand_aligntest (count
, 16, true);
21666 dest
= change_address (destmem
, DImode
, destptr
);
21667 emit_insn (gen_strset (destptr
, dest
, value
));
21668 emit_insn (gen_strset (destptr
, dest
, value
));
21672 dest
= change_address (destmem
, SImode
, destptr
);
21673 emit_insn (gen_strset (destptr
, dest
, value
));
21674 emit_insn (gen_strset (destptr
, dest
, value
));
21675 emit_insn (gen_strset (destptr
, dest
, value
));
21676 emit_insn (gen_strset (destptr
, dest
, value
));
21678 emit_label (label
);
21679 LABEL_NUSES (label
) = 1;
21683 rtx label
= ix86_expand_aligntest (count
, 8, true);
21686 dest
= change_address (destmem
, DImode
, destptr
);
21687 emit_insn (gen_strset (destptr
, dest
, value
));
21691 dest
= change_address (destmem
, SImode
, destptr
);
21692 emit_insn (gen_strset (destptr
, dest
, value
));
21693 emit_insn (gen_strset (destptr
, dest
, value
));
21695 emit_label (label
);
21696 LABEL_NUSES (label
) = 1;
21700 rtx label
= ix86_expand_aligntest (count
, 4, true);
21701 dest
= change_address (destmem
, SImode
, destptr
);
21702 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21703 emit_label (label
);
21704 LABEL_NUSES (label
) = 1;
21708 rtx label
= ix86_expand_aligntest (count
, 2, true);
21709 dest
= change_address (destmem
, HImode
, destptr
);
21710 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21711 emit_label (label
);
21712 LABEL_NUSES (label
) = 1;
21716 rtx label
= ix86_expand_aligntest (count
, 1, true);
21717 dest
= change_address (destmem
, QImode
, destptr
);
21718 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21719 emit_label (label
);
21720 LABEL_NUSES (label
) = 1;
21724 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21725 DESIRED_ALIGNMENT. */
21727 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21728 rtx destptr
, rtx srcptr
, rtx count
,
21729 int align
, int desired_alignment
)
21731 if (align
<= 1 && desired_alignment
> 1)
21733 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21734 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21735 destmem
= change_address (destmem
, QImode
, destptr
);
21736 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21737 ix86_adjust_counter (count
, 1);
21738 emit_label (label
);
21739 LABEL_NUSES (label
) = 1;
21741 if (align
<= 2 && desired_alignment
> 2)
21743 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21744 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21745 destmem
= change_address (destmem
, HImode
, destptr
);
21746 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21747 ix86_adjust_counter (count
, 2);
21748 emit_label (label
);
21749 LABEL_NUSES (label
) = 1;
21751 if (align
<= 4 && desired_alignment
> 4)
21753 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21754 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21755 destmem
= change_address (destmem
, SImode
, destptr
);
21756 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21757 ix86_adjust_counter (count
, 4);
21758 emit_label (label
);
21759 LABEL_NUSES (label
) = 1;
21761 gcc_assert (desired_alignment
<= 8);
21764 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21765 ALIGN_BYTES is how many bytes need to be copied. */
21767 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21768 int desired_align
, int align_bytes
)
21771 rtx orig_dst
= dst
;
21772 rtx orig_src
= src
;
21774 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21775 if (src_align_bytes
>= 0)
21776 src_align_bytes
= desired_align
- src_align_bytes
;
21777 if (align_bytes
& 1)
21779 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21780 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21782 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21784 if (align_bytes
& 2)
21786 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21787 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21788 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21789 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21790 if (src_align_bytes
>= 0
21791 && (src_align_bytes
& 1) == (align_bytes
& 1)
21792 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21793 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21795 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21797 if (align_bytes
& 4)
21799 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21800 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21801 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21802 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21803 if (src_align_bytes
>= 0)
21805 unsigned int src_align
= 0;
21806 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21808 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21810 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21811 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21814 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21816 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21817 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21818 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21819 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21820 if (src_align_bytes
>= 0)
21822 unsigned int src_align
= 0;
21823 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21825 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21827 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21829 if (src_align
> (unsigned int) desired_align
)
21830 src_align
= desired_align
;
21831 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21832 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21834 if (MEM_SIZE_KNOWN_P (orig_dst
))
21835 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21836 if (MEM_SIZE_KNOWN_P (orig_src
))
21837 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
21842 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21843 DESIRED_ALIGNMENT. */
21845 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
21846 int align
, int desired_alignment
)
21848 if (align
<= 1 && desired_alignment
> 1)
21850 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21851 destmem
= change_address (destmem
, QImode
, destptr
);
21852 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
21853 ix86_adjust_counter (count
, 1);
21854 emit_label (label
);
21855 LABEL_NUSES (label
) = 1;
21857 if (align
<= 2 && desired_alignment
> 2)
21859 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21860 destmem
= change_address (destmem
, HImode
, destptr
);
21861 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
21862 ix86_adjust_counter (count
, 2);
21863 emit_label (label
);
21864 LABEL_NUSES (label
) = 1;
21866 if (align
<= 4 && desired_alignment
> 4)
21868 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21869 destmem
= change_address (destmem
, SImode
, destptr
);
21870 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
21871 ix86_adjust_counter (count
, 4);
21872 emit_label (label
);
21873 LABEL_NUSES (label
) = 1;
21875 gcc_assert (desired_alignment
<= 8);
21878 /* Set enough from DST to align DST known to by aligned by ALIGN to
21879 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21881 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
21882 int desired_align
, int align_bytes
)
21885 rtx orig_dst
= dst
;
21886 if (align_bytes
& 1)
21888 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21890 emit_insn (gen_strset (destreg
, dst
,
21891 gen_lowpart (QImode
, value
)));
21893 if (align_bytes
& 2)
21895 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21896 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21897 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21899 emit_insn (gen_strset (destreg
, dst
,
21900 gen_lowpart (HImode
, value
)));
21902 if (align_bytes
& 4)
21904 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21905 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21906 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21908 emit_insn (gen_strset (destreg
, dst
,
21909 gen_lowpart (SImode
, value
)));
21911 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21912 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21913 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21914 if (MEM_SIZE_KNOWN_P (orig_dst
))
21915 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21919 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21920 static enum stringop_alg
21921 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
21922 int *dynamic_check
)
21924 const struct stringop_algs
* algs
;
21925 bool optimize_for_speed
;
21926 /* Algorithms using the rep prefix want at least edi and ecx;
21927 additionally, memset wants eax and memcpy wants esi. Don't
21928 consider such algorithms if the user has appropriated those
21929 registers for their own purposes. */
21930 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
21932 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
21934 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21935 || (alg != rep_prefix_1_byte \
21936 && alg != rep_prefix_4_byte \
21937 && alg != rep_prefix_8_byte))
21938 const struct processor_costs
*cost
;
21940 /* Even if the string operation call is cold, we still might spend a lot
21941 of time processing large blocks. */
21942 if (optimize_function_for_size_p (cfun
)
21943 || (optimize_insn_for_size_p ()
21944 && expected_size
!= -1 && expected_size
< 256))
21945 optimize_for_speed
= false;
21947 optimize_for_speed
= true;
21949 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
21951 *dynamic_check
= -1;
21953 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
21955 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
21956 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
21957 return ix86_stringop_alg
;
21958 /* rep; movq or rep; movl is the smallest variant. */
21959 else if (!optimize_for_speed
)
21961 if (!count
|| (count
& 3))
21962 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
21964 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
21966 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21968 else if (expected_size
!= -1 && expected_size
< 4)
21969 return loop_1_byte
;
21970 else if (expected_size
!= -1)
21973 enum stringop_alg alg
= libcall
;
21974 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21976 /* We get here if the algorithms that were not libcall-based
21977 were rep-prefix based and we are unable to use rep prefixes
21978 based on global register usage. Break out of the loop and
21979 use the heuristic below. */
21980 if (algs
->size
[i
].max
== 0)
21982 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
21984 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21986 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
21988 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21989 last non-libcall inline algorithm. */
21990 if (TARGET_INLINE_ALL_STRINGOPS
)
21992 /* When the current size is best to be copied by a libcall,
21993 but we are still forced to inline, run the heuristic below
21994 that will pick code for medium sized blocks. */
21995 if (alg
!= libcall
)
21999 else if (ALG_USABLE_P (candidate
))
22003 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22005 /* When asked to inline the call anyway, try to pick meaningful choice.
22006 We look for maximal size of block that is faster to copy by hand and
22007 take blocks of at most of that size guessing that average size will
22008 be roughly half of the block.
22010 If this turns out to be bad, we might simply specify the preferred
22011 choice in ix86_costs. */
22012 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22013 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22016 enum stringop_alg alg
;
22018 bool any_alg_usable_p
= true;
22020 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22022 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22023 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22025 if (candidate
!= libcall
&& candidate
22026 && ALG_USABLE_P (candidate
))
22027 max
= algs
->size
[i
].max
;
22029 /* If there aren't any usable algorithms, then recursing on
22030 smaller sizes isn't going to find anything. Just return the
22031 simple byte-at-a-time copy loop. */
22032 if (!any_alg_usable_p
)
22034 /* Pick something reasonable. */
22035 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22036 *dynamic_check
= 128;
22037 return loop_1_byte
;
22041 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
22042 gcc_assert (*dynamic_check
== -1);
22043 gcc_assert (alg
!= libcall
);
22044 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22045 *dynamic_check
= max
;
22048 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22049 #undef ALG_USABLE_P
22052 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22053 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22055 decide_alignment (int align
,
22056 enum stringop_alg alg
,
22059 int desired_align
= 0;
22063 gcc_unreachable ();
22065 case unrolled_loop
:
22066 desired_align
= GET_MODE_SIZE (Pmode
);
22068 case rep_prefix_8_byte
:
22071 case rep_prefix_4_byte
:
22072 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22073 copying whole cacheline at once. */
22074 if (TARGET_PENTIUMPRO
)
22079 case rep_prefix_1_byte
:
22080 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22081 copying whole cacheline at once. */
22082 if (TARGET_PENTIUMPRO
)
22096 if (desired_align
< align
)
22097 desired_align
= align
;
22098 if (expected_size
!= -1 && expected_size
< 4)
22099 desired_align
= align
;
22100 return desired_align
;
22103 /* Return the smallest power of 2 greater than VAL. */
22105 smallest_pow2_greater_than (int val
)
22113 /* Expand string move (memcpy) operation. Use i386 string operations
22114 when profitable. expand_setmem contains similar code. The code
22115 depends upon architecture, block size and alignment, but always has
22116 the same overall structure:
22118 1) Prologue guard: Conditional that jumps up to epilogues for small
22119 blocks that can be handled by epilogue alone. This is faster
22120 but also needed for correctness, since prologue assume the block
22121 is larger than the desired alignment.
22123 Optional dynamic check for size and libcall for large
22124 blocks is emitted here too, with -minline-stringops-dynamically.
22126 2) Prologue: copy first few bytes in order to get destination
22127 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22128 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22129 copied. We emit either a jump tree on power of two sized
22130 blocks, or a byte loop.
22132 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22133 with specified algorithm.
22135 4) Epilogue: code copying tail of the block that is too small to be
22136 handled by main body (or up to size guarded by prologue guard). */
22139 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22140 rtx expected_align_exp
, rtx expected_size_exp
)
22146 rtx jump_around_label
= NULL
;
22147 HOST_WIDE_INT align
= 1;
22148 unsigned HOST_WIDE_INT count
= 0;
22149 HOST_WIDE_INT expected_size
= -1;
22150 int size_needed
= 0, epilogue_size_needed
;
22151 int desired_align
= 0, align_bytes
= 0;
22152 enum stringop_alg alg
;
22154 bool need_zero_guard
= false;
22156 if (CONST_INT_P (align_exp
))
22157 align
= INTVAL (align_exp
);
22158 /* i386 can do misaligned access on reasonably increased cost. */
22159 if (CONST_INT_P (expected_align_exp
)
22160 && INTVAL (expected_align_exp
) > align
)
22161 align
= INTVAL (expected_align_exp
);
22162 /* ALIGN is the minimum of destination and source alignment, but we care here
22163 just about destination alignment. */
22164 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22165 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22167 if (CONST_INT_P (count_exp
))
22168 count
= expected_size
= INTVAL (count_exp
);
22169 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22170 expected_size
= INTVAL (expected_size_exp
);
22172 /* Make sure we don't need to care about overflow later on. */
22173 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22176 /* Step 0: Decide on preferred algorithm, desired alignment and
22177 size of chunks to be copied by main loop. */
22179 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22180 desired_align
= decide_alignment (align
, alg
, expected_size
);
22182 if (!TARGET_ALIGN_STRINGOPS
)
22183 align
= desired_align
;
22185 if (alg
== libcall
)
22187 gcc_assert (alg
!= no_stringop
);
22189 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22190 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22191 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22196 gcc_unreachable ();
22198 need_zero_guard
= true;
22199 size_needed
= GET_MODE_SIZE (word_mode
);
22201 case unrolled_loop
:
22202 need_zero_guard
= true;
22203 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22205 case rep_prefix_8_byte
:
22208 case rep_prefix_4_byte
:
22211 case rep_prefix_1_byte
:
22215 need_zero_guard
= true;
22220 epilogue_size_needed
= size_needed
;
22222 /* Step 1: Prologue guard. */
22224 /* Alignment code needs count to be in register. */
22225 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22227 if (INTVAL (count_exp
) > desired_align
22228 && INTVAL (count_exp
) > size_needed
)
22231 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22232 if (align_bytes
<= 0)
22235 align_bytes
= desired_align
- align_bytes
;
22237 if (align_bytes
== 0)
22238 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22240 gcc_assert (desired_align
>= 1 && align
>= 1);
22242 /* Ensure that alignment prologue won't copy past end of block. */
22243 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22245 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22246 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22247 Make sure it is power of 2. */
22248 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22252 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22254 /* If main algorithm works on QImode, no epilogue is needed.
22255 For small sizes just don't align anything. */
22256 if (size_needed
== 1)
22257 desired_align
= align
;
22264 label
= gen_label_rtx ();
22265 emit_cmp_and_jump_insns (count_exp
,
22266 GEN_INT (epilogue_size_needed
),
22267 LTU
, 0, counter_mode (count_exp
), 1, label
);
22268 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22269 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22271 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22275 /* Emit code to decide on runtime whether library call or inline should be
22277 if (dynamic_check
!= -1)
22279 if (CONST_INT_P (count_exp
))
22281 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22283 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22284 count_exp
= const0_rtx
;
22290 rtx hot_label
= gen_label_rtx ();
22291 jump_around_label
= gen_label_rtx ();
22292 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22293 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22294 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22295 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22296 emit_jump (jump_around_label
);
22297 emit_label (hot_label
);
22301 /* Step 2: Alignment prologue. */
22303 if (desired_align
> align
)
22305 if (align_bytes
== 0)
22307 /* Except for the first move in epilogue, we no longer know
22308 constant offset in aliasing info. It don't seems to worth
22309 the pain to maintain it for the first move, so throw away
22311 src
= change_address (src
, BLKmode
, srcreg
);
22312 dst
= change_address (dst
, BLKmode
, destreg
);
22313 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22318 /* If we know how many bytes need to be stored before dst is
22319 sufficiently aligned, maintain aliasing info accurately. */
22320 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22321 desired_align
, align_bytes
);
22322 count_exp
= plus_constant (counter_mode (count_exp
),
22323 count_exp
, -align_bytes
);
22324 count
-= align_bytes
;
22326 if (need_zero_guard
22327 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22328 || (align_bytes
== 0
22329 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22330 + desired_align
- align
))))
22332 /* It is possible that we copied enough so the main loop will not
22334 gcc_assert (size_needed
> 1);
22335 if (label
== NULL_RTX
)
22336 label
= gen_label_rtx ();
22337 emit_cmp_and_jump_insns (count_exp
,
22338 GEN_INT (size_needed
),
22339 LTU
, 0, counter_mode (count_exp
), 1, label
);
22340 if (expected_size
== -1
22341 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22342 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22344 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22347 if (label
&& size_needed
== 1)
22349 emit_label (label
);
22350 LABEL_NUSES (label
) = 1;
22352 epilogue_size_needed
= 1;
22354 else if (label
== NULL_RTX
)
22355 epilogue_size_needed
= size_needed
;
22357 /* Step 3: Main loop. */
22363 gcc_unreachable ();
22365 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22366 count_exp
, QImode
, 1, expected_size
);
22369 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22370 count_exp
, word_mode
, 1, expected_size
);
22372 case unrolled_loop
:
22373 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22374 registers for 4 temporaries anyway. */
22375 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22376 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22379 case rep_prefix_8_byte
:
22380 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22383 case rep_prefix_4_byte
:
22384 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22387 case rep_prefix_1_byte
:
22388 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22392 /* Adjust properly the offset of src and dest memory for aliasing. */
22393 if (CONST_INT_P (count_exp
))
22395 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22396 (count
/ size_needed
) * size_needed
);
22397 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22398 (count
/ size_needed
) * size_needed
);
22402 src
= change_address (src
, BLKmode
, srcreg
);
22403 dst
= change_address (dst
, BLKmode
, destreg
);
22406 /* Step 4: Epilogue to copy the remaining bytes. */
22410 /* When the main loop is done, COUNT_EXP might hold original count,
22411 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22412 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22413 bytes. Compensate if needed. */
22415 if (size_needed
< epilogue_size_needed
)
22418 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22419 GEN_INT (size_needed
- 1), count_exp
, 1,
22421 if (tmp
!= count_exp
)
22422 emit_move_insn (count_exp
, tmp
);
22424 emit_label (label
);
22425 LABEL_NUSES (label
) = 1;
22428 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22429 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22430 epilogue_size_needed
);
22431 if (jump_around_label
)
22432 emit_label (jump_around_label
);
22436 /* Helper function for memcpy. For QImode value 0xXY produce
22437 0xXYXYXYXY of wide specified by MODE. This is essentially
22438 a * 0x10101010, but we can do slightly better than
22439 synth_mult by unwinding the sequence by hand on CPUs with
22442 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22444 enum machine_mode valmode
= GET_MODE (val
);
22446 int nops
= mode
== DImode
? 3 : 2;
22448 gcc_assert (mode
== SImode
|| mode
== DImode
);
22449 if (val
== const0_rtx
)
22450 return copy_to_mode_reg (mode
, const0_rtx
);
22451 if (CONST_INT_P (val
))
22453 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22457 if (mode
== DImode
)
22458 v
|= (v
<< 16) << 16;
22459 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22462 if (valmode
== VOIDmode
)
22464 if (valmode
!= QImode
)
22465 val
= gen_lowpart (QImode
, val
);
22466 if (mode
== QImode
)
22468 if (!TARGET_PARTIAL_REG_STALL
)
22470 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22471 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22472 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22473 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22475 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22476 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22477 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22482 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22484 if (!TARGET_PARTIAL_REG_STALL
)
22485 if (mode
== SImode
)
22486 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22488 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22491 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22492 NULL
, 1, OPTAB_DIRECT
);
22494 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22496 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22497 NULL
, 1, OPTAB_DIRECT
);
22498 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22499 if (mode
== SImode
)
22501 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22502 NULL
, 1, OPTAB_DIRECT
);
22503 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22508 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22509 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22510 alignment from ALIGN to DESIRED_ALIGN. */
22512 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22517 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22518 promoted_val
= promote_duplicated_reg (DImode
, val
);
22519 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22520 promoted_val
= promote_duplicated_reg (SImode
, val
);
22521 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22522 promoted_val
= promote_duplicated_reg (HImode
, val
);
22524 promoted_val
= val
;
22526 return promoted_val
;
22529 /* Expand string clear operation (bzero). Use i386 string operations when
22530 profitable. See expand_movmem comment for explanation of individual
22531 steps performed. */
22533 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22534 rtx expected_align_exp
, rtx expected_size_exp
)
22539 rtx jump_around_label
= NULL
;
22540 HOST_WIDE_INT align
= 1;
22541 unsigned HOST_WIDE_INT count
= 0;
22542 HOST_WIDE_INT expected_size
= -1;
22543 int size_needed
= 0, epilogue_size_needed
;
22544 int desired_align
= 0, align_bytes
= 0;
22545 enum stringop_alg alg
;
22546 rtx promoted_val
= NULL
;
22547 bool force_loopy_epilogue
= false;
22549 bool need_zero_guard
= false;
22551 if (CONST_INT_P (align_exp
))
22552 align
= INTVAL (align_exp
);
22553 /* i386 can do misaligned access on reasonably increased cost. */
22554 if (CONST_INT_P (expected_align_exp
)
22555 && INTVAL (expected_align_exp
) > align
)
22556 align
= INTVAL (expected_align_exp
);
22557 if (CONST_INT_P (count_exp
))
22558 count
= expected_size
= INTVAL (count_exp
);
22559 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22560 expected_size
= INTVAL (expected_size_exp
);
22562 /* Make sure we don't need to care about overflow later on. */
22563 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22566 /* Step 0: Decide on preferred algorithm, desired alignment and
22567 size of chunks to be copied by main loop. */
22569 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22570 desired_align
= decide_alignment (align
, alg
, expected_size
);
22572 if (!TARGET_ALIGN_STRINGOPS
)
22573 align
= desired_align
;
22575 if (alg
== libcall
)
22577 gcc_assert (alg
!= no_stringop
);
22579 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22580 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22585 gcc_unreachable ();
22587 need_zero_guard
= true;
22588 size_needed
= GET_MODE_SIZE (word_mode
);
22590 case unrolled_loop
:
22591 need_zero_guard
= true;
22592 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
22594 case rep_prefix_8_byte
:
22597 case rep_prefix_4_byte
:
22600 case rep_prefix_1_byte
:
22604 need_zero_guard
= true;
22608 epilogue_size_needed
= size_needed
;
22610 /* Step 1: Prologue guard. */
22612 /* Alignment code needs count to be in register. */
22613 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22615 if (INTVAL (count_exp
) > desired_align
22616 && INTVAL (count_exp
) > size_needed
)
22619 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22620 if (align_bytes
<= 0)
22623 align_bytes
= desired_align
- align_bytes
;
22625 if (align_bytes
== 0)
22627 enum machine_mode mode
= SImode
;
22628 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22630 count_exp
= force_reg (mode
, count_exp
);
22633 /* Do the cheap promotion to allow better CSE across the
22634 main loop and epilogue (ie one load of the big constant in the
22635 front of all code. */
22636 if (CONST_INT_P (val_exp
))
22637 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22638 desired_align
, align
);
22639 /* Ensure that alignment prologue won't copy past end of block. */
22640 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22642 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22643 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22644 Make sure it is power of 2. */
22645 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22647 /* To improve performance of small blocks, we jump around the VAL
22648 promoting mode. This mean that if the promoted VAL is not constant,
22649 we might not use it in the epilogue and have to use byte
22651 if (epilogue_size_needed
> 2 && !promoted_val
)
22652 force_loopy_epilogue
= true;
22655 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22657 /* If main algorithm works on QImode, no epilogue is needed.
22658 For small sizes just don't align anything. */
22659 if (size_needed
== 1)
22660 desired_align
= align
;
22667 label
= gen_label_rtx ();
22668 emit_cmp_and_jump_insns (count_exp
,
22669 GEN_INT (epilogue_size_needed
),
22670 LTU
, 0, counter_mode (count_exp
), 1, label
);
22671 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22672 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22674 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22677 if (dynamic_check
!= -1)
22679 rtx hot_label
= gen_label_rtx ();
22680 jump_around_label
= gen_label_rtx ();
22681 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22682 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22683 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22684 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22685 emit_jump (jump_around_label
);
22686 emit_label (hot_label
);
22689 /* Step 2: Alignment prologue. */
22691 /* Do the expensive promotion once we branched off the small blocks. */
22693 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22694 desired_align
, align
);
22695 gcc_assert (desired_align
>= 1 && align
>= 1);
22697 if (desired_align
> align
)
22699 if (align_bytes
== 0)
22701 /* Except for the first move in epilogue, we no longer know
22702 constant offset in aliasing info. It don't seems to worth
22703 the pain to maintain it for the first move, so throw away
22705 dst
= change_address (dst
, BLKmode
, destreg
);
22706 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22711 /* If we know how many bytes need to be stored before dst is
22712 sufficiently aligned, maintain aliasing info accurately. */
22713 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22714 desired_align
, align_bytes
);
22715 count_exp
= plus_constant (counter_mode (count_exp
),
22716 count_exp
, -align_bytes
);
22717 count
-= align_bytes
;
22719 if (need_zero_guard
22720 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22721 || (align_bytes
== 0
22722 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22723 + desired_align
- align
))))
22725 /* It is possible that we copied enough so the main loop will not
22727 gcc_assert (size_needed
> 1);
22728 if (label
== NULL_RTX
)
22729 label
= gen_label_rtx ();
22730 emit_cmp_and_jump_insns (count_exp
,
22731 GEN_INT (size_needed
),
22732 LTU
, 0, counter_mode (count_exp
), 1, label
);
22733 if (expected_size
== -1
22734 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22735 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22737 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22740 if (label
&& size_needed
== 1)
22742 emit_label (label
);
22743 LABEL_NUSES (label
) = 1;
22745 promoted_val
= val_exp
;
22746 epilogue_size_needed
= 1;
22748 else if (label
== NULL_RTX
)
22749 epilogue_size_needed
= size_needed
;
22751 /* Step 3: Main loop. */
22757 gcc_unreachable ();
22759 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22760 count_exp
, QImode
, 1, expected_size
);
22763 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22764 count_exp
, word_mode
, 1, expected_size
);
22766 case unrolled_loop
:
22767 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22768 count_exp
, word_mode
, 4, expected_size
);
22770 case rep_prefix_8_byte
:
22771 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22774 case rep_prefix_4_byte
:
22775 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22778 case rep_prefix_1_byte
:
22779 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22783 /* Adjust properly the offset of src and dest memory for aliasing. */
22784 if (CONST_INT_P (count_exp
))
22785 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22786 (count
/ size_needed
) * size_needed
);
22788 dst
= change_address (dst
, BLKmode
, destreg
);
22790 /* Step 4: Epilogue to copy the remaining bytes. */
22794 /* When the main loop is done, COUNT_EXP might hold original count,
22795 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22796 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22797 bytes. Compensate if needed. */
22799 if (size_needed
< epilogue_size_needed
)
22802 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22803 GEN_INT (size_needed
- 1), count_exp
, 1,
22805 if (tmp
!= count_exp
)
22806 emit_move_insn (count_exp
, tmp
);
22808 emit_label (label
);
22809 LABEL_NUSES (label
) = 1;
22812 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22814 if (force_loopy_epilogue
)
22815 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22816 epilogue_size_needed
);
22818 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22819 epilogue_size_needed
);
22821 if (jump_around_label
)
22822 emit_label (jump_around_label
);
22826 /* Expand the appropriate insns for doing strlen if not just doing
22829 out = result, initialized with the start address
22830 align_rtx = alignment of the address.
22831 scratch = scratch register, initialized with the startaddress when
22832 not aligned, otherwise undefined
22834 This is just the body. It needs the initializations mentioned above and
22835 some address computing at the end. These things are done in i386.md. */
22838 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
22842 rtx align_2_label
= NULL_RTX
;
22843 rtx align_3_label
= NULL_RTX
;
22844 rtx align_4_label
= gen_label_rtx ();
22845 rtx end_0_label
= gen_label_rtx ();
22847 rtx tmpreg
= gen_reg_rtx (SImode
);
22848 rtx scratch
= gen_reg_rtx (SImode
);
22852 if (CONST_INT_P (align_rtx
))
22853 align
= INTVAL (align_rtx
);
22855 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22857 /* Is there a known alignment and is it less than 4? */
22860 rtx scratch1
= gen_reg_rtx (Pmode
);
22861 emit_move_insn (scratch1
, out
);
22862 /* Is there a known alignment and is it not 2? */
22865 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
22866 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
22868 /* Leave just the 3 lower bits. */
22869 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
22870 NULL_RTX
, 0, OPTAB_WIDEN
);
22872 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22873 Pmode
, 1, align_4_label
);
22874 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
22875 Pmode
, 1, align_2_label
);
22876 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
22877 Pmode
, 1, align_3_label
);
22881 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22882 check if is aligned to 4 - byte. */
22884 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
22885 NULL_RTX
, 0, OPTAB_WIDEN
);
22887 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22888 Pmode
, 1, align_4_label
);
22891 mem
= change_address (src
, QImode
, out
);
22893 /* Now compare the bytes. */
22895 /* Compare the first n unaligned byte on a byte per byte basis. */
22896 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
22897 QImode
, 1, end_0_label
);
22899 /* Increment the address. */
22900 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22902 /* Not needed with an alignment of 2 */
22905 emit_label (align_2_label
);
22907 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22910 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22912 emit_label (align_3_label
);
22915 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22918 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22921 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22922 align this loop. It gives only huge programs, but does not help to
22924 emit_label (align_4_label
);
22926 mem
= change_address (src
, SImode
, out
);
22927 emit_move_insn (scratch
, mem
);
22928 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
22930 /* This formula yields a nonzero result iff one of the bytes is zero.
22931 This saves three branches inside loop and many cycles. */
22933 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
22934 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
22935 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
22936 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
22937 gen_int_mode (0x80808080, SImode
)));
22938 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
22943 rtx reg
= gen_reg_rtx (SImode
);
22944 rtx reg2
= gen_reg_rtx (Pmode
);
22945 emit_move_insn (reg
, tmpreg
);
22946 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
22948 /* If zero is not in the first two bytes, move two bytes forward. */
22949 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22950 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22951 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22952 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
22953 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
22956 /* Emit lea manually to avoid clobbering of flags. */
22957 emit_insn (gen_rtx_SET (SImode
, reg2
,
22958 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
22960 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22961 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22962 emit_insn (gen_rtx_SET (VOIDmode
, out
,
22963 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
22969 rtx end_2_label
= gen_label_rtx ();
22970 /* Is zero in the first two bytes? */
22972 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22973 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22974 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
22975 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
22976 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
22978 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
22979 JUMP_LABEL (tmp
) = end_2_label
;
22981 /* Not in the first two. Move two bytes forward. */
22982 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
22983 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
22985 emit_label (end_2_label
);
22989 /* Avoid branch in fixing the byte. */
22990 tmpreg
= gen_lowpart (QImode
, tmpreg
);
22991 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
22992 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22993 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
22994 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
22996 emit_label (end_0_label
);
22999 /* Expand strlen. */
23002 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23004 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23006 /* The generic case of strlen expander is long. Avoid it's
23007 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23009 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23010 && !TARGET_INLINE_ALL_STRINGOPS
23011 && !optimize_insn_for_size_p ()
23012 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23015 addr
= force_reg (Pmode
, XEXP (src
, 0));
23016 scratch1
= gen_reg_rtx (Pmode
);
23018 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23019 && !optimize_insn_for_size_p ())
23021 /* Well it seems that some optimizer does not combine a call like
23022 foo(strlen(bar), strlen(bar));
23023 when the move and the subtraction is done here. It does calculate
23024 the length just once when these instructions are done inside of
23025 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23026 often used and I use one fewer register for the lifetime of
23027 output_strlen_unroll() this is better. */
23029 emit_move_insn (out
, addr
);
23031 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23033 /* strlensi_unroll_1 returns the address of the zero at the end of
23034 the string, like memchr(), so compute the length by subtracting
23035 the start address. */
23036 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23042 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23043 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23046 scratch2
= gen_reg_rtx (Pmode
);
23047 scratch3
= gen_reg_rtx (Pmode
);
23048 scratch4
= force_reg (Pmode
, constm1_rtx
);
23050 emit_move_insn (scratch3
, addr
);
23051 eoschar
= force_reg (QImode
, eoschar
);
23053 src
= replace_equiv_address_nv (src
, scratch3
);
23055 /* If .md starts supporting :P, this can be done in .md. */
23056 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23057 scratch4
), UNSPEC_SCAS
);
23058 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23059 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23060 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23065 /* For given symbol (function) construct code to compute address of it's PLT
23066 entry in large x86-64 PIC model. */
23068 construct_plt_address (rtx symbol
)
23072 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23073 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23074 gcc_assert (Pmode
== DImode
);
23076 tmp
= gen_reg_rtx (Pmode
);
23077 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23079 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23080 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23085 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23087 rtx pop
, bool sibcall
)
23089 /* We need to represent that SI and DI registers are clobbered
23091 static int clobbered_registers
[] = {
23092 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23093 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23094 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23095 XMM15_REG
, SI_REG
, DI_REG
23097 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23098 rtx use
= NULL
, call
;
23099 unsigned int vec_len
;
23101 if (pop
== const0_rtx
)
23103 gcc_assert (!TARGET_64BIT
|| !pop
);
23105 if (TARGET_MACHO
&& !TARGET_64BIT
)
23108 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23109 fnaddr
= machopic_indirect_call_target (fnaddr
);
23114 /* Static functions and indirect calls don't need the pic register. */
23115 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23116 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23117 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23118 use_reg (&use
, pic_offset_table_rtx
);
23121 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23123 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23124 emit_move_insn (al
, callarg2
);
23125 use_reg (&use
, al
);
23128 if (ix86_cmodel
== CM_LARGE_PIC
23130 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23131 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23132 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23134 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23135 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23137 fnaddr
= XEXP (fnaddr
, 0);
23138 if (GET_MODE (fnaddr
) != word_mode
)
23139 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23140 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23144 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23146 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23147 vec
[vec_len
++] = call
;
23151 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23152 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23153 vec
[vec_len
++] = pop
;
23156 if (TARGET_64BIT_MS_ABI
23157 && (!callarg2
|| INTVAL (callarg2
) != -2))
23161 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23162 UNSPEC_MS_TO_SYSV_CALL
);
23164 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23166 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23168 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23170 clobbered_registers
[i
]));
23173 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23174 if (TARGET_VZEROUPPER
)
23177 if (cfun
->machine
->callee_pass_avx256_p
)
23179 if (cfun
->machine
->callee_return_avx256_p
)
23180 avx256
= callee_return_pass_avx256
;
23182 avx256
= callee_pass_avx256
;
23184 else if (cfun
->machine
->callee_return_avx256_p
)
23185 avx256
= callee_return_avx256
;
23187 avx256
= call_no_avx256
;
23189 if (reload_completed
)
23190 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23192 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23193 gen_rtvec (1, GEN_INT (avx256
)),
23194 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23198 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23199 call
= emit_call_insn (call
);
23201 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23207 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23209 rtx pat
= PATTERN (insn
);
23210 rtvec vec
= XVEC (pat
, 0);
23211 int len
= GET_NUM_ELEM (vec
) - 1;
23213 /* Strip off the last entry of the parallel. */
23214 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23215 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23217 pat
= RTVEC_ELT (vec
, 0);
23219 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23221 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23222 emit_call_insn (pat
);
23225 /* Output the assembly for a call instruction. */
23228 ix86_output_call_insn (rtx insn
, rtx call_op
)
23230 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23231 bool seh_nop_p
= false;
23234 if (SIBLING_CALL_P (insn
))
23238 /* SEH epilogue detection requires the indirect branch case
23239 to include REX.W. */
23240 else if (TARGET_SEH
)
23241 xasm
= "rex.W jmp %A0";
23245 output_asm_insn (xasm
, &call_op
);
23249 /* SEH unwinding can require an extra nop to be emitted in several
23250 circumstances. Determine if we have one of those. */
23255 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23257 /* If we get to another real insn, we don't need the nop. */
23261 /* If we get to the epilogue note, prevent a catch region from
23262 being adjacent to the standard epilogue sequence. If non-
23263 call-exceptions, we'll have done this during epilogue emission. */
23264 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23265 && !flag_non_call_exceptions
23266 && !can_throw_internal (insn
))
23273 /* If we didn't find a real insn following the call, prevent the
23274 unwinder from looking into the next function. */
23280 xasm
= "call\t%P0";
23282 xasm
= "call\t%A0";
23284 output_asm_insn (xasm
, &call_op
);
23292 /* Clear stack slot assignments remembered from previous functions.
23293 This is called from INIT_EXPANDERS once before RTL is emitted for each
23296 static struct machine_function
*
23297 ix86_init_machine_status (void)
23299 struct machine_function
*f
;
23301 f
= ggc_alloc_cleared_machine_function ();
23302 f
->use_fast_prologue_epilogue_nregs
= -1;
23303 f
->tls_descriptor_call_expanded_p
= 0;
23304 f
->call_abi
= ix86_abi
;
23309 /* Return a MEM corresponding to a stack slot with mode MODE.
23310 Allocate a new slot if necessary.
23312 The RTL for a function can have several slots available: N is
23313 which slot to use. */
23316 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23318 struct stack_local_entry
*s
;
23320 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23322 /* Virtual slot is valid only before vregs are instantiated. */
23323 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23325 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23326 if (s
->mode
== mode
&& s
->n
== n
)
23327 return validize_mem (copy_rtx (s
->rtl
));
23329 s
= ggc_alloc_stack_local_entry ();
23332 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23334 s
->next
= ix86_stack_locals
;
23335 ix86_stack_locals
= s
;
23336 return validize_mem (s
->rtl
);
23339 /* Calculate the length of the memory address in the instruction encoding.
23340 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23341 or other prefixes. */
23344 memory_address_length (rtx addr
)
23346 struct ix86_address parts
;
23347 rtx base
, index
, disp
;
23351 if (GET_CODE (addr
) == PRE_DEC
23352 || GET_CODE (addr
) == POST_INC
23353 || GET_CODE (addr
) == PRE_MODIFY
23354 || GET_CODE (addr
) == POST_MODIFY
)
23357 ok
= ix86_decompose_address (addr
, &parts
);
23360 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23361 parts
.base
= SUBREG_REG (parts
.base
);
23362 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23363 parts
.index
= SUBREG_REG (parts
.index
);
23366 index
= parts
.index
;
23369 /* Add length of addr32 prefix. */
23370 len
= (GET_CODE (addr
) == ZERO_EXTEND
23371 || GET_CODE (addr
) == AND
);
23374 - esp as the base always wants an index,
23375 - ebp as the base always wants a displacement,
23376 - r12 as the base always wants an index,
23377 - r13 as the base always wants a displacement. */
23379 /* Register Indirect. */
23380 if (base
&& !index
&& !disp
)
23382 /* esp (for its index) and ebp (for its displacement) need
23383 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23386 && (addr
== arg_pointer_rtx
23387 || addr
== frame_pointer_rtx
23388 || REGNO (addr
) == SP_REG
23389 || REGNO (addr
) == BP_REG
23390 || REGNO (addr
) == R12_REG
23391 || REGNO (addr
) == R13_REG
))
23395 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23396 is not disp32, but disp32(%rip), so for disp32
23397 SIB byte is needed, unless print_operand_address
23398 optimizes it into disp32(%rip) or (%rip) is implied
23400 else if (disp
&& !base
&& !index
)
23407 if (GET_CODE (disp
) == CONST
)
23408 symbol
= XEXP (disp
, 0);
23409 if (GET_CODE (symbol
) == PLUS
23410 && CONST_INT_P (XEXP (symbol
, 1)))
23411 symbol
= XEXP (symbol
, 0);
23413 if (GET_CODE (symbol
) != LABEL_REF
23414 && (GET_CODE (symbol
) != SYMBOL_REF
23415 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23416 && (GET_CODE (symbol
) != UNSPEC
23417 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23418 && XINT (symbol
, 1) != UNSPEC_PCREL
23419 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23426 /* Find the length of the displacement constant. */
23429 if (base
&& satisfies_constraint_K (disp
))
23434 /* ebp always wants a displacement. Similarly r13. */
23435 else if (base
&& REG_P (base
)
23436 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23439 /* An index requires the two-byte modrm form.... */
23441 /* ...like esp (or r12), which always wants an index. */
23442 || base
== arg_pointer_rtx
23443 || base
== frame_pointer_rtx
23444 || (base
&& REG_P (base
)
23445 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23462 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23463 is set, expect that insn have 8bit immediate alternative. */
23465 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23469 extract_insn_cached (insn
);
23470 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23471 if (CONSTANT_P (recog_data
.operand
[i
]))
23473 enum attr_mode mode
= get_attr_mode (insn
);
23476 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23478 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23485 ival
= trunc_int_for_mode (ival
, HImode
);
23488 ival
= trunc_int_for_mode (ival
, SImode
);
23493 if (IN_RANGE (ival
, -128, 127))
23510 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23515 fatal_insn ("unknown insn mode", insn
);
23520 /* Compute default value for "length_address" attribute. */
23522 ix86_attr_length_address_default (rtx insn
)
23526 if (get_attr_type (insn
) == TYPE_LEA
)
23528 rtx set
= PATTERN (insn
), addr
;
23530 if (GET_CODE (set
) == PARALLEL
)
23531 set
= XVECEXP (set
, 0, 0);
23533 gcc_assert (GET_CODE (set
) == SET
);
23535 addr
= SET_SRC (set
);
23536 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23538 if (GET_CODE (addr
) == ZERO_EXTEND
)
23539 addr
= XEXP (addr
, 0);
23540 if (GET_CODE (addr
) == SUBREG
)
23541 addr
= SUBREG_REG (addr
);
23544 return memory_address_length (addr
);
23547 extract_insn_cached (insn
);
23548 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23549 if (MEM_P (recog_data
.operand
[i
]))
23551 constrain_operands_cached (reload_completed
);
23552 if (which_alternative
!= -1)
23554 const char *constraints
= recog_data
.constraints
[i
];
23555 int alt
= which_alternative
;
23557 while (*constraints
== '=' || *constraints
== '+')
23560 while (*constraints
++ != ',')
23562 /* Skip ignored operands. */
23563 if (*constraints
== 'X')
23566 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23571 /* Compute default value for "length_vex" attribute. It includes
23572 2 or 3 byte VEX prefix and 1 opcode byte. */
23575 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23579 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23580 byte VEX prefix. */
23581 if (!has_0f_opcode
|| has_vex_w
)
23584 /* We can always use 2 byte VEX prefix in 32bit. */
23588 extract_insn_cached (insn
);
23590 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23591 if (REG_P (recog_data
.operand
[i
]))
23593 /* REX.W bit uses 3 byte VEX prefix. */
23594 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23595 && GENERAL_REG_P (recog_data
.operand
[i
]))
23600 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23601 if (MEM_P (recog_data
.operand
[i
])
23602 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23609 /* Return the maximum number of instructions a cpu can issue. */
23612 ix86_issue_rate (void)
23616 case PROCESSOR_PENTIUM
:
23617 case PROCESSOR_ATOM
:
23621 case PROCESSOR_PENTIUMPRO
:
23622 case PROCESSOR_PENTIUM4
:
23623 case PROCESSOR_CORE2_32
:
23624 case PROCESSOR_CORE2_64
:
23625 case PROCESSOR_COREI7_32
:
23626 case PROCESSOR_COREI7_64
:
23627 case PROCESSOR_ATHLON
:
23629 case PROCESSOR_AMDFAM10
:
23630 case PROCESSOR_NOCONA
:
23631 case PROCESSOR_GENERIC32
:
23632 case PROCESSOR_GENERIC64
:
23633 case PROCESSOR_BDVER1
:
23634 case PROCESSOR_BDVER2
:
23635 case PROCESSOR_BTVER1
:
23643 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23644 by DEP_INSN and nothing set by DEP_INSN. */
23647 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23651 /* Simplify the test for uninteresting insns. */
23652 if (insn_type
!= TYPE_SETCC
23653 && insn_type
!= TYPE_ICMOV
23654 && insn_type
!= TYPE_FCMOV
23655 && insn_type
!= TYPE_IBR
)
23658 if ((set
= single_set (dep_insn
)) != 0)
23660 set
= SET_DEST (set
);
23663 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23664 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23665 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23666 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23668 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23669 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23674 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23677 /* This test is true if the dependent insn reads the flags but
23678 not any other potentially set register. */
23679 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23682 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23688 /* Return true iff USE_INSN has a memory address with operands set by
23692 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23695 extract_insn_cached (use_insn
);
23696 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23697 if (MEM_P (recog_data
.operand
[i
]))
23699 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23700 return modified_in_p (addr
, set_insn
) != 0;
23706 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23708 enum attr_type insn_type
, dep_insn_type
;
23709 enum attr_memory memory
;
23711 int dep_insn_code_number
;
23713 /* Anti and output dependencies have zero cost on all CPUs. */
23714 if (REG_NOTE_KIND (link
) != 0)
23717 dep_insn_code_number
= recog_memoized (dep_insn
);
23719 /* If we can't recognize the insns, we can't really do anything. */
23720 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23723 insn_type
= get_attr_type (insn
);
23724 dep_insn_type
= get_attr_type (dep_insn
);
23728 case PROCESSOR_PENTIUM
:
23729 /* Address Generation Interlock adds a cycle of latency. */
23730 if (insn_type
== TYPE_LEA
)
23732 rtx addr
= PATTERN (insn
);
23734 if (GET_CODE (addr
) == PARALLEL
)
23735 addr
= XVECEXP (addr
, 0, 0);
23737 gcc_assert (GET_CODE (addr
) == SET
);
23739 addr
= SET_SRC (addr
);
23740 if (modified_in_p (addr
, dep_insn
))
23743 else if (ix86_agi_dependent (dep_insn
, insn
))
23746 /* ??? Compares pair with jump/setcc. */
23747 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23750 /* Floating point stores require value to be ready one cycle earlier. */
23751 if (insn_type
== TYPE_FMOV
23752 && get_attr_memory (insn
) == MEMORY_STORE
23753 && !ix86_agi_dependent (dep_insn
, insn
))
23757 case PROCESSOR_PENTIUMPRO
:
23758 memory
= get_attr_memory (insn
);
23760 /* INT->FP conversion is expensive. */
23761 if (get_attr_fp_int_src (dep_insn
))
23764 /* There is one cycle extra latency between an FP op and a store. */
23765 if (insn_type
== TYPE_FMOV
23766 && (set
= single_set (dep_insn
)) != NULL_RTX
23767 && (set2
= single_set (insn
)) != NULL_RTX
23768 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23769 && MEM_P (SET_DEST (set2
)))
23772 /* Show ability of reorder buffer to hide latency of load by executing
23773 in parallel with previous instruction in case
23774 previous instruction is not needed to compute the address. */
23775 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23776 && !ix86_agi_dependent (dep_insn
, insn
))
23778 /* Claim moves to take one cycle, as core can issue one load
23779 at time and the next load can start cycle later. */
23780 if (dep_insn_type
== TYPE_IMOV
23781 || dep_insn_type
== TYPE_FMOV
)
23789 memory
= get_attr_memory (insn
);
23791 /* The esp dependency is resolved before the instruction is really
23793 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23794 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23797 /* INT->FP conversion is expensive. */
23798 if (get_attr_fp_int_src (dep_insn
))
23801 /* Show ability of reorder buffer to hide latency of load by executing
23802 in parallel with previous instruction in case
23803 previous instruction is not needed to compute the address. */
23804 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23805 && !ix86_agi_dependent (dep_insn
, insn
))
23807 /* Claim moves to take one cycle, as core can issue one load
23808 at time and the next load can start cycle later. */
23809 if (dep_insn_type
== TYPE_IMOV
23810 || dep_insn_type
== TYPE_FMOV
)
23819 case PROCESSOR_ATHLON
:
23821 case PROCESSOR_AMDFAM10
:
23822 case PROCESSOR_BDVER1
:
23823 case PROCESSOR_BDVER2
:
23824 case PROCESSOR_BTVER1
:
23825 case PROCESSOR_ATOM
:
23826 case PROCESSOR_GENERIC32
:
23827 case PROCESSOR_GENERIC64
:
23828 memory
= get_attr_memory (insn
);
23830 /* Show ability of reorder buffer to hide latency of load by executing
23831 in parallel with previous instruction in case
23832 previous instruction is not needed to compute the address. */
23833 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23834 && !ix86_agi_dependent (dep_insn
, insn
))
23836 enum attr_unit unit
= get_attr_unit (insn
);
23839 /* Because of the difference between the length of integer and
23840 floating unit pipeline preparation stages, the memory operands
23841 for floating point are cheaper.
23843 ??? For Athlon it the difference is most probably 2. */
23844 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
23847 loadcost
= TARGET_ATHLON
? 2 : 0;
23849 if (cost
>= loadcost
)
23862 /* How many alternative schedules to try. This should be as wide as the
23863 scheduling freedom in the DFA, but no wider. Making this value too
23864 large results extra work for the scheduler. */
23867 ia32_multipass_dfa_lookahead (void)
23871 case PROCESSOR_PENTIUM
:
23874 case PROCESSOR_PENTIUMPRO
:
23878 case PROCESSOR_CORE2_32
:
23879 case PROCESSOR_CORE2_64
:
23880 case PROCESSOR_COREI7_32
:
23881 case PROCESSOR_COREI7_64
:
23882 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23883 as many instructions can be executed on a cycle, i.e.,
23884 issue_rate. I wonder why tuning for many CPUs does not do this. */
23885 return ix86_issue_rate ();
23894 /* Model decoder of Core 2/i7.
23895 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23896 track the instruction fetch block boundaries and make sure that long
23897 (9+ bytes) instructions are assigned to D0. */
23899 /* Maximum length of an insn that can be handled by
23900 a secondary decoder unit. '8' for Core 2/i7. */
23901 static int core2i7_secondary_decoder_max_insn_size
;
23903 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23904 '16' for Core 2/i7. */
23905 static int core2i7_ifetch_block_size
;
23907 /* Maximum number of instructions decoder can handle per cycle.
23908 '6' for Core 2/i7. */
23909 static int core2i7_ifetch_block_max_insns
;
23911 typedef struct ix86_first_cycle_multipass_data_
*
23912 ix86_first_cycle_multipass_data_t
;
23913 typedef const struct ix86_first_cycle_multipass_data_
*
23914 const_ix86_first_cycle_multipass_data_t
;
23916 /* A variable to store target state across calls to max_issue within
23918 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
23919 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
23921 /* Initialize DATA. */
23923 core2i7_first_cycle_multipass_init (void *_data
)
23925 ix86_first_cycle_multipass_data_t data
23926 = (ix86_first_cycle_multipass_data_t
) _data
;
23928 data
->ifetch_block_len
= 0;
23929 data
->ifetch_block_n_insns
= 0;
23930 data
->ready_try_change
= NULL
;
23931 data
->ready_try_change_size
= 0;
23934 /* Advancing the cycle; reset ifetch block counts. */
23936 core2i7_dfa_post_advance_cycle (void)
23938 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
23940 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23942 data
->ifetch_block_len
= 0;
23943 data
->ifetch_block_n_insns
= 0;
23946 static int min_insn_size (rtx
);
23948 /* Filter out insns from ready_try that the core will not be able to issue
23949 on current cycle due to decoder. */
23951 core2i7_first_cycle_multipass_filter_ready_try
23952 (const_ix86_first_cycle_multipass_data_t data
,
23953 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
23960 if (ready_try
[n_ready
])
23963 insn
= get_ready_element (n_ready
);
23964 insn_size
= min_insn_size (insn
);
23966 if (/* If this is a too long an insn for a secondary decoder ... */
23967 (!first_cycle_insn_p
23968 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
23969 /* ... or it would not fit into the ifetch block ... */
23970 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
23971 /* ... or the decoder is full already ... */
23972 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
23973 /* ... mask the insn out. */
23975 ready_try
[n_ready
] = 1;
23977 if (data
->ready_try_change
)
23978 SET_BIT (data
->ready_try_change
, n_ready
);
23983 /* Prepare for a new round of multipass lookahead scheduling. */
23985 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
23986 bool first_cycle_insn_p
)
23988 ix86_first_cycle_multipass_data_t data
23989 = (ix86_first_cycle_multipass_data_t
) _data
;
23990 const_ix86_first_cycle_multipass_data_t prev_data
23991 = ix86_first_cycle_multipass_data
;
23993 /* Restore the state from the end of the previous round. */
23994 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
23995 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
23997 /* Filter instructions that cannot be issued on current cycle due to
23998 decoder restrictions. */
23999 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24000 first_cycle_insn_p
);
24003 /* INSN is being issued in current solution. Account for its impact on
24004 the decoder model. */
24006 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24007 rtx insn
, const void *_prev_data
)
24009 ix86_first_cycle_multipass_data_t data
24010 = (ix86_first_cycle_multipass_data_t
) _data
;
24011 const_ix86_first_cycle_multipass_data_t prev_data
24012 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24014 int insn_size
= min_insn_size (insn
);
24016 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24017 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24018 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24019 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24021 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24022 if (!data
->ready_try_change
)
24024 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24025 data
->ready_try_change_size
= n_ready
;
24027 else if (data
->ready_try_change_size
< n_ready
)
24029 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24031 data
->ready_try_change_size
= n_ready
;
24033 sbitmap_zero (data
->ready_try_change
);
24035 /* Filter out insns from ready_try that the core will not be able to issue
24036 on current cycle due to decoder. */
24037 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24041 /* Revert the effect on ready_try. */
24043 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24045 int n_ready ATTRIBUTE_UNUSED
)
24047 const_ix86_first_cycle_multipass_data_t data
24048 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24049 unsigned int i
= 0;
24050 sbitmap_iterator sbi
;
24052 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24053 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
24059 /* Save the result of multipass lookahead scheduling for the next round. */
24061 core2i7_first_cycle_multipass_end (const void *_data
)
24063 const_ix86_first_cycle_multipass_data_t data
24064 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24065 ix86_first_cycle_multipass_data_t next_data
24066 = ix86_first_cycle_multipass_data
;
24070 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24071 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24075 /* Deallocate target data. */
24077 core2i7_first_cycle_multipass_fini (void *_data
)
24079 ix86_first_cycle_multipass_data_t data
24080 = (ix86_first_cycle_multipass_data_t
) _data
;
24082 if (data
->ready_try_change
)
24084 sbitmap_free (data
->ready_try_change
);
24085 data
->ready_try_change
= NULL
;
24086 data
->ready_try_change_size
= 0;
24090 /* Prepare for scheduling pass. */
24092 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24093 int verbose ATTRIBUTE_UNUSED
,
24094 int max_uid ATTRIBUTE_UNUSED
)
24096 /* Install scheduling hooks for current CPU. Some of these hooks are used
24097 in time-critical parts of the scheduler, so we only set them up when
24098 they are actually used. */
24101 case PROCESSOR_CORE2_32
:
24102 case PROCESSOR_CORE2_64
:
24103 case PROCESSOR_COREI7_32
:
24104 case PROCESSOR_COREI7_64
:
24105 targetm
.sched
.dfa_post_advance_cycle
24106 = core2i7_dfa_post_advance_cycle
;
24107 targetm
.sched
.first_cycle_multipass_init
24108 = core2i7_first_cycle_multipass_init
;
24109 targetm
.sched
.first_cycle_multipass_begin
24110 = core2i7_first_cycle_multipass_begin
;
24111 targetm
.sched
.first_cycle_multipass_issue
24112 = core2i7_first_cycle_multipass_issue
;
24113 targetm
.sched
.first_cycle_multipass_backtrack
24114 = core2i7_first_cycle_multipass_backtrack
;
24115 targetm
.sched
.first_cycle_multipass_end
24116 = core2i7_first_cycle_multipass_end
;
24117 targetm
.sched
.first_cycle_multipass_fini
24118 = core2i7_first_cycle_multipass_fini
;
24120 /* Set decoder parameters. */
24121 core2i7_secondary_decoder_max_insn_size
= 8;
24122 core2i7_ifetch_block_size
= 16;
24123 core2i7_ifetch_block_max_insns
= 6;
24127 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24128 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24129 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24130 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24131 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24132 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24133 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24139 /* Compute the alignment given to a constant that is being placed in memory.
24140 EXP is the constant and ALIGN is the alignment that the object would
24142 The value of this function is used instead of that alignment to align
24146 ix86_constant_alignment (tree exp
, int align
)
24148 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24149 || TREE_CODE (exp
) == INTEGER_CST
)
24151 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24153 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24156 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24157 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24158 return BITS_PER_WORD
;
24163 /* Compute the alignment for a static variable.
24164 TYPE is the data type, and ALIGN is the alignment that
24165 the object would ordinarily have. The value of this function is used
24166 instead of that alignment to align the object. */
24169 ix86_data_alignment (tree type
, int align
)
24171 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24173 if (AGGREGATE_TYPE_P (type
)
24174 && TYPE_SIZE (type
)
24175 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24176 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24177 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24178 && align
< max_align
)
24181 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24182 to 16byte boundary. */
24185 if (AGGREGATE_TYPE_P (type
)
24186 && TYPE_SIZE (type
)
24187 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24188 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24189 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24193 if (TREE_CODE (type
) == ARRAY_TYPE
)
24195 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24197 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24200 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24203 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24205 if ((TYPE_MODE (type
) == XCmode
24206 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24209 else if ((TREE_CODE (type
) == RECORD_TYPE
24210 || TREE_CODE (type
) == UNION_TYPE
24211 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24212 && TYPE_FIELDS (type
))
24214 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24216 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24219 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24220 || TREE_CODE (type
) == INTEGER_TYPE
)
24222 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24224 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24231 /* Compute the alignment for a local variable or a stack slot. EXP is
24232 the data type or decl itself, MODE is the widest mode available and
24233 ALIGN is the alignment that the object would ordinarily have. The
24234 value of this macro is used instead of that alignment to align the
24238 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24239 unsigned int align
)
24243 if (exp
&& DECL_P (exp
))
24245 type
= TREE_TYPE (exp
);
24254 /* Don't do dynamic stack realignment for long long objects with
24255 -mpreferred-stack-boundary=2. */
24258 && ix86_preferred_stack_boundary
< 64
24259 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24260 && (!type
|| !TYPE_USER_ALIGN (type
))
24261 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24264 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24265 register in MODE. We will return the largest alignment of XF
24269 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24270 align
= GET_MODE_ALIGNMENT (DFmode
);
24274 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24275 to 16byte boundary. Exact wording is:
24277 An array uses the same alignment as its elements, except that a local or
24278 global array variable of length at least 16 bytes or
24279 a C99 variable-length array variable always has alignment of at least 16 bytes.
24281 This was added to allow use of aligned SSE instructions at arrays. This
24282 rule is meant for static storage (where compiler can not do the analysis
24283 by itself). We follow it for automatic variables only when convenient.
24284 We fully control everything in the function compiled and functions from
24285 other unit can not rely on the alignment.
24287 Exclude va_list type. It is the common case of local array where
24288 we can not benefit from the alignment. */
24289 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24292 if (AGGREGATE_TYPE_P (type
)
24293 && (va_list_type_node
== NULL_TREE
24294 || (TYPE_MAIN_VARIANT (type
)
24295 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24296 && TYPE_SIZE (type
)
24297 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24298 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24299 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24302 if (TREE_CODE (type
) == ARRAY_TYPE
)
24304 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24306 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24309 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24311 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24313 if ((TYPE_MODE (type
) == XCmode
24314 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24317 else if ((TREE_CODE (type
) == RECORD_TYPE
24318 || TREE_CODE (type
) == UNION_TYPE
24319 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24320 && TYPE_FIELDS (type
))
24322 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24324 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24327 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24328 || TREE_CODE (type
) == INTEGER_TYPE
)
24331 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24333 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24339 /* Compute the minimum required alignment for dynamic stack realignment
24340 purposes for a local variable, parameter or a stack slot. EXP is
24341 the data type or decl itself, MODE is its mode and ALIGN is the
24342 alignment that the object would ordinarily have. */
24345 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24346 unsigned int align
)
24350 if (exp
&& DECL_P (exp
))
24352 type
= TREE_TYPE (exp
);
24361 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24364 /* Don't do dynamic stack realignment for long long objects with
24365 -mpreferred-stack-boundary=2. */
24366 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24367 && (!type
|| !TYPE_USER_ALIGN (type
))
24368 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24374 /* Find a location for the static chain incoming to a nested function.
24375 This is a register, unless all free registers are used by arguments. */
24378 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24382 if (!DECL_STATIC_CHAIN (fndecl
))
24387 /* We always use R10 in 64-bit mode. */
24395 /* By default in 32-bit mode we use ECX to pass the static chain. */
24398 fntype
= TREE_TYPE (fndecl
);
24399 ccvt
= ix86_get_callcvt (fntype
);
24400 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24402 /* Fastcall functions use ecx/edx for arguments, which leaves
24403 us with EAX for the static chain.
24404 Thiscall functions use ecx for arguments, which also
24405 leaves us with EAX for the static chain. */
24408 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24410 /* For regparm 3, we have no free call-clobbered registers in
24411 which to store the static chain. In order to implement this,
24412 we have the trampoline push the static chain to the stack.
24413 However, we can't push a value below the return address when
24414 we call the nested function directly, so we have to use an
24415 alternate entry point. For this we use ESI, and have the
24416 alternate entry point push ESI, so that things appear the
24417 same once we're executing the nested function. */
24420 if (fndecl
== current_function_decl
)
24421 ix86_static_chain_on_stack
= true;
24422 return gen_frame_mem (SImode
,
24423 plus_constant (Pmode
,
24424 arg_pointer_rtx
, -8));
24430 return gen_rtx_REG (Pmode
, regno
);
24433 /* Emit RTL insns to initialize the variable parts of a trampoline.
24434 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24435 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24436 to be passed to the target function. */
24439 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24445 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24451 /* Load the function address to r11. Try to load address using
24452 the shorter movl instead of movabs. We may want to support
24453 movq for kernel mode, but kernel does not use trampolines at
24454 the moment. FNADDR is a 32bit address and may not be in
24455 DImode when ptr_mode == SImode. Always use movl in this
24457 if (ptr_mode
== SImode
24458 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24460 fnaddr
= copy_addr_to_reg (fnaddr
);
24462 mem
= adjust_address (m_tramp
, HImode
, offset
);
24463 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24465 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24466 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24471 mem
= adjust_address (m_tramp
, HImode
, offset
);
24472 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24474 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24475 emit_move_insn (mem
, fnaddr
);
24479 /* Load static chain using movabs to r10. Use the shorter movl
24480 instead of movabs when ptr_mode == SImode. */
24481 if (ptr_mode
== SImode
)
24492 mem
= adjust_address (m_tramp
, HImode
, offset
);
24493 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24495 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24496 emit_move_insn (mem
, chain_value
);
24499 /* Jump to r11; the last (unused) byte is a nop, only there to
24500 pad the write out to a single 32-bit store. */
24501 mem
= adjust_address (m_tramp
, SImode
, offset
);
24502 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24509 /* Depending on the static chain location, either load a register
24510 with a constant, or push the constant to the stack. All of the
24511 instructions are the same size. */
24512 chain
= ix86_static_chain (fndecl
, true);
24515 switch (REGNO (chain
))
24518 opcode
= 0xb8; break;
24520 opcode
= 0xb9; break;
24522 gcc_unreachable ();
24528 mem
= adjust_address (m_tramp
, QImode
, offset
);
24529 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24531 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24532 emit_move_insn (mem
, chain_value
);
24535 mem
= adjust_address (m_tramp
, QImode
, offset
);
24536 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24538 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24540 /* Compute offset from the end of the jmp to the target function.
24541 In the case in which the trampoline stores the static chain on
24542 the stack, we need to skip the first insn which pushes the
24543 (call-saved) register static chain; this push is 1 byte. */
24545 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24546 plus_constant (Pmode
, XEXP (m_tramp
, 0),
24547 offset
- (MEM_P (chain
) ? 1 : 0)),
24548 NULL_RTX
, 1, OPTAB_DIRECT
);
24549 emit_move_insn (mem
, disp
);
24552 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24554 #ifdef HAVE_ENABLE_EXECUTE_STACK
24555 #ifdef CHECK_EXECUTE_STACK_ENABLED
24556 if (CHECK_EXECUTE_STACK_ENABLED
)
24558 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24559 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24563 /* The following file contains several enumerations and data structures
24564 built from the definitions in i386-builtin-types.def. */
24566 #include "i386-builtin-types.inc"
24568 /* Table for the ix86 builtin non-function types. */
24569 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24571 /* Retrieve an element from the above table, building some of
24572 the types lazily. */
24575 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24577 unsigned int index
;
24580 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24582 type
= ix86_builtin_type_tab
[(int) tcode
];
24586 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24587 if (tcode
<= IX86_BT_LAST_VECT
)
24589 enum machine_mode mode
;
24591 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24592 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24593 mode
= ix86_builtin_type_vect_mode
[index
];
24595 type
= build_vector_type_for_mode (itype
, mode
);
24601 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24602 if (tcode
<= IX86_BT_LAST_PTR
)
24603 quals
= TYPE_UNQUALIFIED
;
24605 quals
= TYPE_QUAL_CONST
;
24607 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24608 if (quals
!= TYPE_UNQUALIFIED
)
24609 itype
= build_qualified_type (itype
, quals
);
24611 type
= build_pointer_type (itype
);
24614 ix86_builtin_type_tab
[(int) tcode
] = type
;
24618 /* Table for the ix86 builtin function types. */
24619 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24621 /* Retrieve an element from the above table, building some of
24622 the types lazily. */
24625 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24629 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24631 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24635 if (tcode
<= IX86_BT_LAST_FUNC
)
24637 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24638 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24639 tree rtype
, atype
, args
= void_list_node
;
24642 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24643 for (i
= after
- 1; i
> start
; --i
)
24645 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24646 args
= tree_cons (NULL
, atype
, args
);
24649 type
= build_function_type (rtype
, args
);
24653 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24654 enum ix86_builtin_func_type icode
;
24656 icode
= ix86_builtin_func_alias_base
[index
];
24657 type
= ix86_get_builtin_func_type (icode
);
24660 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24665 /* Codes for all the SSE/MMX builtins. */
24668 IX86_BUILTIN_ADDPS
,
24669 IX86_BUILTIN_ADDSS
,
24670 IX86_BUILTIN_DIVPS
,
24671 IX86_BUILTIN_DIVSS
,
24672 IX86_BUILTIN_MULPS
,
24673 IX86_BUILTIN_MULSS
,
24674 IX86_BUILTIN_SUBPS
,
24675 IX86_BUILTIN_SUBSS
,
24677 IX86_BUILTIN_CMPEQPS
,
24678 IX86_BUILTIN_CMPLTPS
,
24679 IX86_BUILTIN_CMPLEPS
,
24680 IX86_BUILTIN_CMPGTPS
,
24681 IX86_BUILTIN_CMPGEPS
,
24682 IX86_BUILTIN_CMPNEQPS
,
24683 IX86_BUILTIN_CMPNLTPS
,
24684 IX86_BUILTIN_CMPNLEPS
,
24685 IX86_BUILTIN_CMPNGTPS
,
24686 IX86_BUILTIN_CMPNGEPS
,
24687 IX86_BUILTIN_CMPORDPS
,
24688 IX86_BUILTIN_CMPUNORDPS
,
24689 IX86_BUILTIN_CMPEQSS
,
24690 IX86_BUILTIN_CMPLTSS
,
24691 IX86_BUILTIN_CMPLESS
,
24692 IX86_BUILTIN_CMPNEQSS
,
24693 IX86_BUILTIN_CMPNLTSS
,
24694 IX86_BUILTIN_CMPNLESS
,
24695 IX86_BUILTIN_CMPNGTSS
,
24696 IX86_BUILTIN_CMPNGESS
,
24697 IX86_BUILTIN_CMPORDSS
,
24698 IX86_BUILTIN_CMPUNORDSS
,
24700 IX86_BUILTIN_COMIEQSS
,
24701 IX86_BUILTIN_COMILTSS
,
24702 IX86_BUILTIN_COMILESS
,
24703 IX86_BUILTIN_COMIGTSS
,
24704 IX86_BUILTIN_COMIGESS
,
24705 IX86_BUILTIN_COMINEQSS
,
24706 IX86_BUILTIN_UCOMIEQSS
,
24707 IX86_BUILTIN_UCOMILTSS
,
24708 IX86_BUILTIN_UCOMILESS
,
24709 IX86_BUILTIN_UCOMIGTSS
,
24710 IX86_BUILTIN_UCOMIGESS
,
24711 IX86_BUILTIN_UCOMINEQSS
,
24713 IX86_BUILTIN_CVTPI2PS
,
24714 IX86_BUILTIN_CVTPS2PI
,
24715 IX86_BUILTIN_CVTSI2SS
,
24716 IX86_BUILTIN_CVTSI642SS
,
24717 IX86_BUILTIN_CVTSS2SI
,
24718 IX86_BUILTIN_CVTSS2SI64
,
24719 IX86_BUILTIN_CVTTPS2PI
,
24720 IX86_BUILTIN_CVTTSS2SI
,
24721 IX86_BUILTIN_CVTTSS2SI64
,
24723 IX86_BUILTIN_MAXPS
,
24724 IX86_BUILTIN_MAXSS
,
24725 IX86_BUILTIN_MINPS
,
24726 IX86_BUILTIN_MINSS
,
24728 IX86_BUILTIN_LOADUPS
,
24729 IX86_BUILTIN_STOREUPS
,
24730 IX86_BUILTIN_MOVSS
,
24732 IX86_BUILTIN_MOVHLPS
,
24733 IX86_BUILTIN_MOVLHPS
,
24734 IX86_BUILTIN_LOADHPS
,
24735 IX86_BUILTIN_LOADLPS
,
24736 IX86_BUILTIN_STOREHPS
,
24737 IX86_BUILTIN_STORELPS
,
24739 IX86_BUILTIN_MASKMOVQ
,
24740 IX86_BUILTIN_MOVMSKPS
,
24741 IX86_BUILTIN_PMOVMSKB
,
24743 IX86_BUILTIN_MOVNTPS
,
24744 IX86_BUILTIN_MOVNTQ
,
24746 IX86_BUILTIN_LOADDQU
,
24747 IX86_BUILTIN_STOREDQU
,
24749 IX86_BUILTIN_PACKSSWB
,
24750 IX86_BUILTIN_PACKSSDW
,
24751 IX86_BUILTIN_PACKUSWB
,
24753 IX86_BUILTIN_PADDB
,
24754 IX86_BUILTIN_PADDW
,
24755 IX86_BUILTIN_PADDD
,
24756 IX86_BUILTIN_PADDQ
,
24757 IX86_BUILTIN_PADDSB
,
24758 IX86_BUILTIN_PADDSW
,
24759 IX86_BUILTIN_PADDUSB
,
24760 IX86_BUILTIN_PADDUSW
,
24761 IX86_BUILTIN_PSUBB
,
24762 IX86_BUILTIN_PSUBW
,
24763 IX86_BUILTIN_PSUBD
,
24764 IX86_BUILTIN_PSUBQ
,
24765 IX86_BUILTIN_PSUBSB
,
24766 IX86_BUILTIN_PSUBSW
,
24767 IX86_BUILTIN_PSUBUSB
,
24768 IX86_BUILTIN_PSUBUSW
,
24771 IX86_BUILTIN_PANDN
,
24775 IX86_BUILTIN_PAVGB
,
24776 IX86_BUILTIN_PAVGW
,
24778 IX86_BUILTIN_PCMPEQB
,
24779 IX86_BUILTIN_PCMPEQW
,
24780 IX86_BUILTIN_PCMPEQD
,
24781 IX86_BUILTIN_PCMPGTB
,
24782 IX86_BUILTIN_PCMPGTW
,
24783 IX86_BUILTIN_PCMPGTD
,
24785 IX86_BUILTIN_PMADDWD
,
24787 IX86_BUILTIN_PMAXSW
,
24788 IX86_BUILTIN_PMAXUB
,
24789 IX86_BUILTIN_PMINSW
,
24790 IX86_BUILTIN_PMINUB
,
24792 IX86_BUILTIN_PMULHUW
,
24793 IX86_BUILTIN_PMULHW
,
24794 IX86_BUILTIN_PMULLW
,
24796 IX86_BUILTIN_PSADBW
,
24797 IX86_BUILTIN_PSHUFW
,
24799 IX86_BUILTIN_PSLLW
,
24800 IX86_BUILTIN_PSLLD
,
24801 IX86_BUILTIN_PSLLQ
,
24802 IX86_BUILTIN_PSRAW
,
24803 IX86_BUILTIN_PSRAD
,
24804 IX86_BUILTIN_PSRLW
,
24805 IX86_BUILTIN_PSRLD
,
24806 IX86_BUILTIN_PSRLQ
,
24807 IX86_BUILTIN_PSLLWI
,
24808 IX86_BUILTIN_PSLLDI
,
24809 IX86_BUILTIN_PSLLQI
,
24810 IX86_BUILTIN_PSRAWI
,
24811 IX86_BUILTIN_PSRADI
,
24812 IX86_BUILTIN_PSRLWI
,
24813 IX86_BUILTIN_PSRLDI
,
24814 IX86_BUILTIN_PSRLQI
,
24816 IX86_BUILTIN_PUNPCKHBW
,
24817 IX86_BUILTIN_PUNPCKHWD
,
24818 IX86_BUILTIN_PUNPCKHDQ
,
24819 IX86_BUILTIN_PUNPCKLBW
,
24820 IX86_BUILTIN_PUNPCKLWD
,
24821 IX86_BUILTIN_PUNPCKLDQ
,
24823 IX86_BUILTIN_SHUFPS
,
24825 IX86_BUILTIN_RCPPS
,
24826 IX86_BUILTIN_RCPSS
,
24827 IX86_BUILTIN_RSQRTPS
,
24828 IX86_BUILTIN_RSQRTPS_NR
,
24829 IX86_BUILTIN_RSQRTSS
,
24830 IX86_BUILTIN_RSQRTF
,
24831 IX86_BUILTIN_SQRTPS
,
24832 IX86_BUILTIN_SQRTPS_NR
,
24833 IX86_BUILTIN_SQRTSS
,
24835 IX86_BUILTIN_UNPCKHPS
,
24836 IX86_BUILTIN_UNPCKLPS
,
24838 IX86_BUILTIN_ANDPS
,
24839 IX86_BUILTIN_ANDNPS
,
24841 IX86_BUILTIN_XORPS
,
24844 IX86_BUILTIN_LDMXCSR
,
24845 IX86_BUILTIN_STMXCSR
,
24846 IX86_BUILTIN_SFENCE
,
24848 /* 3DNow! Original */
24849 IX86_BUILTIN_FEMMS
,
24850 IX86_BUILTIN_PAVGUSB
,
24851 IX86_BUILTIN_PF2ID
,
24852 IX86_BUILTIN_PFACC
,
24853 IX86_BUILTIN_PFADD
,
24854 IX86_BUILTIN_PFCMPEQ
,
24855 IX86_BUILTIN_PFCMPGE
,
24856 IX86_BUILTIN_PFCMPGT
,
24857 IX86_BUILTIN_PFMAX
,
24858 IX86_BUILTIN_PFMIN
,
24859 IX86_BUILTIN_PFMUL
,
24860 IX86_BUILTIN_PFRCP
,
24861 IX86_BUILTIN_PFRCPIT1
,
24862 IX86_BUILTIN_PFRCPIT2
,
24863 IX86_BUILTIN_PFRSQIT1
,
24864 IX86_BUILTIN_PFRSQRT
,
24865 IX86_BUILTIN_PFSUB
,
24866 IX86_BUILTIN_PFSUBR
,
24867 IX86_BUILTIN_PI2FD
,
24868 IX86_BUILTIN_PMULHRW
,
24870 /* 3DNow! Athlon Extensions */
24871 IX86_BUILTIN_PF2IW
,
24872 IX86_BUILTIN_PFNACC
,
24873 IX86_BUILTIN_PFPNACC
,
24874 IX86_BUILTIN_PI2FW
,
24875 IX86_BUILTIN_PSWAPDSI
,
24876 IX86_BUILTIN_PSWAPDSF
,
24879 IX86_BUILTIN_ADDPD
,
24880 IX86_BUILTIN_ADDSD
,
24881 IX86_BUILTIN_DIVPD
,
24882 IX86_BUILTIN_DIVSD
,
24883 IX86_BUILTIN_MULPD
,
24884 IX86_BUILTIN_MULSD
,
24885 IX86_BUILTIN_SUBPD
,
24886 IX86_BUILTIN_SUBSD
,
24888 IX86_BUILTIN_CMPEQPD
,
24889 IX86_BUILTIN_CMPLTPD
,
24890 IX86_BUILTIN_CMPLEPD
,
24891 IX86_BUILTIN_CMPGTPD
,
24892 IX86_BUILTIN_CMPGEPD
,
24893 IX86_BUILTIN_CMPNEQPD
,
24894 IX86_BUILTIN_CMPNLTPD
,
24895 IX86_BUILTIN_CMPNLEPD
,
24896 IX86_BUILTIN_CMPNGTPD
,
24897 IX86_BUILTIN_CMPNGEPD
,
24898 IX86_BUILTIN_CMPORDPD
,
24899 IX86_BUILTIN_CMPUNORDPD
,
24900 IX86_BUILTIN_CMPEQSD
,
24901 IX86_BUILTIN_CMPLTSD
,
24902 IX86_BUILTIN_CMPLESD
,
24903 IX86_BUILTIN_CMPNEQSD
,
24904 IX86_BUILTIN_CMPNLTSD
,
24905 IX86_BUILTIN_CMPNLESD
,
24906 IX86_BUILTIN_CMPORDSD
,
24907 IX86_BUILTIN_CMPUNORDSD
,
24909 IX86_BUILTIN_COMIEQSD
,
24910 IX86_BUILTIN_COMILTSD
,
24911 IX86_BUILTIN_COMILESD
,
24912 IX86_BUILTIN_COMIGTSD
,
24913 IX86_BUILTIN_COMIGESD
,
24914 IX86_BUILTIN_COMINEQSD
,
24915 IX86_BUILTIN_UCOMIEQSD
,
24916 IX86_BUILTIN_UCOMILTSD
,
24917 IX86_BUILTIN_UCOMILESD
,
24918 IX86_BUILTIN_UCOMIGTSD
,
24919 IX86_BUILTIN_UCOMIGESD
,
24920 IX86_BUILTIN_UCOMINEQSD
,
24922 IX86_BUILTIN_MAXPD
,
24923 IX86_BUILTIN_MAXSD
,
24924 IX86_BUILTIN_MINPD
,
24925 IX86_BUILTIN_MINSD
,
24927 IX86_BUILTIN_ANDPD
,
24928 IX86_BUILTIN_ANDNPD
,
24930 IX86_BUILTIN_XORPD
,
24932 IX86_BUILTIN_SQRTPD
,
24933 IX86_BUILTIN_SQRTSD
,
24935 IX86_BUILTIN_UNPCKHPD
,
24936 IX86_BUILTIN_UNPCKLPD
,
24938 IX86_BUILTIN_SHUFPD
,
24940 IX86_BUILTIN_LOADUPD
,
24941 IX86_BUILTIN_STOREUPD
,
24942 IX86_BUILTIN_MOVSD
,
24944 IX86_BUILTIN_LOADHPD
,
24945 IX86_BUILTIN_LOADLPD
,
24947 IX86_BUILTIN_CVTDQ2PD
,
24948 IX86_BUILTIN_CVTDQ2PS
,
24950 IX86_BUILTIN_CVTPD2DQ
,
24951 IX86_BUILTIN_CVTPD2PI
,
24952 IX86_BUILTIN_CVTPD2PS
,
24953 IX86_BUILTIN_CVTTPD2DQ
,
24954 IX86_BUILTIN_CVTTPD2PI
,
24956 IX86_BUILTIN_CVTPI2PD
,
24957 IX86_BUILTIN_CVTSI2SD
,
24958 IX86_BUILTIN_CVTSI642SD
,
24960 IX86_BUILTIN_CVTSD2SI
,
24961 IX86_BUILTIN_CVTSD2SI64
,
24962 IX86_BUILTIN_CVTSD2SS
,
24963 IX86_BUILTIN_CVTSS2SD
,
24964 IX86_BUILTIN_CVTTSD2SI
,
24965 IX86_BUILTIN_CVTTSD2SI64
,
24967 IX86_BUILTIN_CVTPS2DQ
,
24968 IX86_BUILTIN_CVTPS2PD
,
24969 IX86_BUILTIN_CVTTPS2DQ
,
24971 IX86_BUILTIN_MOVNTI
,
24972 IX86_BUILTIN_MOVNTI64
,
24973 IX86_BUILTIN_MOVNTPD
,
24974 IX86_BUILTIN_MOVNTDQ
,
24976 IX86_BUILTIN_MOVQ128
,
24979 IX86_BUILTIN_MASKMOVDQU
,
24980 IX86_BUILTIN_MOVMSKPD
,
24981 IX86_BUILTIN_PMOVMSKB128
,
24983 IX86_BUILTIN_PACKSSWB128
,
24984 IX86_BUILTIN_PACKSSDW128
,
24985 IX86_BUILTIN_PACKUSWB128
,
24987 IX86_BUILTIN_PADDB128
,
24988 IX86_BUILTIN_PADDW128
,
24989 IX86_BUILTIN_PADDD128
,
24990 IX86_BUILTIN_PADDQ128
,
24991 IX86_BUILTIN_PADDSB128
,
24992 IX86_BUILTIN_PADDSW128
,
24993 IX86_BUILTIN_PADDUSB128
,
24994 IX86_BUILTIN_PADDUSW128
,
24995 IX86_BUILTIN_PSUBB128
,
24996 IX86_BUILTIN_PSUBW128
,
24997 IX86_BUILTIN_PSUBD128
,
24998 IX86_BUILTIN_PSUBQ128
,
24999 IX86_BUILTIN_PSUBSB128
,
25000 IX86_BUILTIN_PSUBSW128
,
25001 IX86_BUILTIN_PSUBUSB128
,
25002 IX86_BUILTIN_PSUBUSW128
,
25004 IX86_BUILTIN_PAND128
,
25005 IX86_BUILTIN_PANDN128
,
25006 IX86_BUILTIN_POR128
,
25007 IX86_BUILTIN_PXOR128
,
25009 IX86_BUILTIN_PAVGB128
,
25010 IX86_BUILTIN_PAVGW128
,
25012 IX86_BUILTIN_PCMPEQB128
,
25013 IX86_BUILTIN_PCMPEQW128
,
25014 IX86_BUILTIN_PCMPEQD128
,
25015 IX86_BUILTIN_PCMPGTB128
,
25016 IX86_BUILTIN_PCMPGTW128
,
25017 IX86_BUILTIN_PCMPGTD128
,
25019 IX86_BUILTIN_PMADDWD128
,
25021 IX86_BUILTIN_PMAXSW128
,
25022 IX86_BUILTIN_PMAXUB128
,
25023 IX86_BUILTIN_PMINSW128
,
25024 IX86_BUILTIN_PMINUB128
,
25026 IX86_BUILTIN_PMULUDQ
,
25027 IX86_BUILTIN_PMULUDQ128
,
25028 IX86_BUILTIN_PMULHUW128
,
25029 IX86_BUILTIN_PMULHW128
,
25030 IX86_BUILTIN_PMULLW128
,
25032 IX86_BUILTIN_PSADBW128
,
25033 IX86_BUILTIN_PSHUFHW
,
25034 IX86_BUILTIN_PSHUFLW
,
25035 IX86_BUILTIN_PSHUFD
,
25037 IX86_BUILTIN_PSLLDQI128
,
25038 IX86_BUILTIN_PSLLWI128
,
25039 IX86_BUILTIN_PSLLDI128
,
25040 IX86_BUILTIN_PSLLQI128
,
25041 IX86_BUILTIN_PSRAWI128
,
25042 IX86_BUILTIN_PSRADI128
,
25043 IX86_BUILTIN_PSRLDQI128
,
25044 IX86_BUILTIN_PSRLWI128
,
25045 IX86_BUILTIN_PSRLDI128
,
25046 IX86_BUILTIN_PSRLQI128
,
25048 IX86_BUILTIN_PSLLDQ128
,
25049 IX86_BUILTIN_PSLLW128
,
25050 IX86_BUILTIN_PSLLD128
,
25051 IX86_BUILTIN_PSLLQ128
,
25052 IX86_BUILTIN_PSRAW128
,
25053 IX86_BUILTIN_PSRAD128
,
25054 IX86_BUILTIN_PSRLW128
,
25055 IX86_BUILTIN_PSRLD128
,
25056 IX86_BUILTIN_PSRLQ128
,
25058 IX86_BUILTIN_PUNPCKHBW128
,
25059 IX86_BUILTIN_PUNPCKHWD128
,
25060 IX86_BUILTIN_PUNPCKHDQ128
,
25061 IX86_BUILTIN_PUNPCKHQDQ128
,
25062 IX86_BUILTIN_PUNPCKLBW128
,
25063 IX86_BUILTIN_PUNPCKLWD128
,
25064 IX86_BUILTIN_PUNPCKLDQ128
,
25065 IX86_BUILTIN_PUNPCKLQDQ128
,
25067 IX86_BUILTIN_CLFLUSH
,
25068 IX86_BUILTIN_MFENCE
,
25069 IX86_BUILTIN_LFENCE
,
25070 IX86_BUILTIN_PAUSE
,
25072 IX86_BUILTIN_BSRSI
,
25073 IX86_BUILTIN_BSRDI
,
25074 IX86_BUILTIN_RDPMC
,
25075 IX86_BUILTIN_RDTSC
,
25076 IX86_BUILTIN_RDTSCP
,
25077 IX86_BUILTIN_ROLQI
,
25078 IX86_BUILTIN_ROLHI
,
25079 IX86_BUILTIN_RORQI
,
25080 IX86_BUILTIN_RORHI
,
25083 IX86_BUILTIN_ADDSUBPS
,
25084 IX86_BUILTIN_HADDPS
,
25085 IX86_BUILTIN_HSUBPS
,
25086 IX86_BUILTIN_MOVSHDUP
,
25087 IX86_BUILTIN_MOVSLDUP
,
25088 IX86_BUILTIN_ADDSUBPD
,
25089 IX86_BUILTIN_HADDPD
,
25090 IX86_BUILTIN_HSUBPD
,
25091 IX86_BUILTIN_LDDQU
,
25093 IX86_BUILTIN_MONITOR
,
25094 IX86_BUILTIN_MWAIT
,
25097 IX86_BUILTIN_PHADDW
,
25098 IX86_BUILTIN_PHADDD
,
25099 IX86_BUILTIN_PHADDSW
,
25100 IX86_BUILTIN_PHSUBW
,
25101 IX86_BUILTIN_PHSUBD
,
25102 IX86_BUILTIN_PHSUBSW
,
25103 IX86_BUILTIN_PMADDUBSW
,
25104 IX86_BUILTIN_PMULHRSW
,
25105 IX86_BUILTIN_PSHUFB
,
25106 IX86_BUILTIN_PSIGNB
,
25107 IX86_BUILTIN_PSIGNW
,
25108 IX86_BUILTIN_PSIGND
,
25109 IX86_BUILTIN_PALIGNR
,
25110 IX86_BUILTIN_PABSB
,
25111 IX86_BUILTIN_PABSW
,
25112 IX86_BUILTIN_PABSD
,
25114 IX86_BUILTIN_PHADDW128
,
25115 IX86_BUILTIN_PHADDD128
,
25116 IX86_BUILTIN_PHADDSW128
,
25117 IX86_BUILTIN_PHSUBW128
,
25118 IX86_BUILTIN_PHSUBD128
,
25119 IX86_BUILTIN_PHSUBSW128
,
25120 IX86_BUILTIN_PMADDUBSW128
,
25121 IX86_BUILTIN_PMULHRSW128
,
25122 IX86_BUILTIN_PSHUFB128
,
25123 IX86_BUILTIN_PSIGNB128
,
25124 IX86_BUILTIN_PSIGNW128
,
25125 IX86_BUILTIN_PSIGND128
,
25126 IX86_BUILTIN_PALIGNR128
,
25127 IX86_BUILTIN_PABSB128
,
25128 IX86_BUILTIN_PABSW128
,
25129 IX86_BUILTIN_PABSD128
,
25131 /* AMDFAM10 - SSE4A New Instructions. */
25132 IX86_BUILTIN_MOVNTSD
,
25133 IX86_BUILTIN_MOVNTSS
,
25134 IX86_BUILTIN_EXTRQI
,
25135 IX86_BUILTIN_EXTRQ
,
25136 IX86_BUILTIN_INSERTQI
,
25137 IX86_BUILTIN_INSERTQ
,
25140 IX86_BUILTIN_BLENDPD
,
25141 IX86_BUILTIN_BLENDPS
,
25142 IX86_BUILTIN_BLENDVPD
,
25143 IX86_BUILTIN_BLENDVPS
,
25144 IX86_BUILTIN_PBLENDVB128
,
25145 IX86_BUILTIN_PBLENDW128
,
25150 IX86_BUILTIN_INSERTPS128
,
25152 IX86_BUILTIN_MOVNTDQA
,
25153 IX86_BUILTIN_MPSADBW128
,
25154 IX86_BUILTIN_PACKUSDW128
,
25155 IX86_BUILTIN_PCMPEQQ
,
25156 IX86_BUILTIN_PHMINPOSUW128
,
25158 IX86_BUILTIN_PMAXSB128
,
25159 IX86_BUILTIN_PMAXSD128
,
25160 IX86_BUILTIN_PMAXUD128
,
25161 IX86_BUILTIN_PMAXUW128
,
25163 IX86_BUILTIN_PMINSB128
,
25164 IX86_BUILTIN_PMINSD128
,
25165 IX86_BUILTIN_PMINUD128
,
25166 IX86_BUILTIN_PMINUW128
,
25168 IX86_BUILTIN_PMOVSXBW128
,
25169 IX86_BUILTIN_PMOVSXBD128
,
25170 IX86_BUILTIN_PMOVSXBQ128
,
25171 IX86_BUILTIN_PMOVSXWD128
,
25172 IX86_BUILTIN_PMOVSXWQ128
,
25173 IX86_BUILTIN_PMOVSXDQ128
,
25175 IX86_BUILTIN_PMOVZXBW128
,
25176 IX86_BUILTIN_PMOVZXBD128
,
25177 IX86_BUILTIN_PMOVZXBQ128
,
25178 IX86_BUILTIN_PMOVZXWD128
,
25179 IX86_BUILTIN_PMOVZXWQ128
,
25180 IX86_BUILTIN_PMOVZXDQ128
,
25182 IX86_BUILTIN_PMULDQ128
,
25183 IX86_BUILTIN_PMULLD128
,
25185 IX86_BUILTIN_ROUNDSD
,
25186 IX86_BUILTIN_ROUNDSS
,
25188 IX86_BUILTIN_ROUNDPD
,
25189 IX86_BUILTIN_ROUNDPS
,
25191 IX86_BUILTIN_FLOORPD
,
25192 IX86_BUILTIN_CEILPD
,
25193 IX86_BUILTIN_TRUNCPD
,
25194 IX86_BUILTIN_RINTPD
,
25195 IX86_BUILTIN_ROUNDPD_AZ
,
25197 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25198 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25199 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25201 IX86_BUILTIN_FLOORPS
,
25202 IX86_BUILTIN_CEILPS
,
25203 IX86_BUILTIN_TRUNCPS
,
25204 IX86_BUILTIN_RINTPS
,
25205 IX86_BUILTIN_ROUNDPS_AZ
,
25207 IX86_BUILTIN_FLOORPS_SFIX
,
25208 IX86_BUILTIN_CEILPS_SFIX
,
25209 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25211 IX86_BUILTIN_PTESTZ
,
25212 IX86_BUILTIN_PTESTC
,
25213 IX86_BUILTIN_PTESTNZC
,
25215 IX86_BUILTIN_VEC_INIT_V2SI
,
25216 IX86_BUILTIN_VEC_INIT_V4HI
,
25217 IX86_BUILTIN_VEC_INIT_V8QI
,
25218 IX86_BUILTIN_VEC_EXT_V2DF
,
25219 IX86_BUILTIN_VEC_EXT_V2DI
,
25220 IX86_BUILTIN_VEC_EXT_V4SF
,
25221 IX86_BUILTIN_VEC_EXT_V4SI
,
25222 IX86_BUILTIN_VEC_EXT_V8HI
,
25223 IX86_BUILTIN_VEC_EXT_V2SI
,
25224 IX86_BUILTIN_VEC_EXT_V4HI
,
25225 IX86_BUILTIN_VEC_EXT_V16QI
,
25226 IX86_BUILTIN_VEC_SET_V2DI
,
25227 IX86_BUILTIN_VEC_SET_V4SF
,
25228 IX86_BUILTIN_VEC_SET_V4SI
,
25229 IX86_BUILTIN_VEC_SET_V8HI
,
25230 IX86_BUILTIN_VEC_SET_V4HI
,
25231 IX86_BUILTIN_VEC_SET_V16QI
,
25233 IX86_BUILTIN_VEC_PACK_SFIX
,
25234 IX86_BUILTIN_VEC_PACK_SFIX256
,
25237 IX86_BUILTIN_CRC32QI
,
25238 IX86_BUILTIN_CRC32HI
,
25239 IX86_BUILTIN_CRC32SI
,
25240 IX86_BUILTIN_CRC32DI
,
25242 IX86_BUILTIN_PCMPESTRI128
,
25243 IX86_BUILTIN_PCMPESTRM128
,
25244 IX86_BUILTIN_PCMPESTRA128
,
25245 IX86_BUILTIN_PCMPESTRC128
,
25246 IX86_BUILTIN_PCMPESTRO128
,
25247 IX86_BUILTIN_PCMPESTRS128
,
25248 IX86_BUILTIN_PCMPESTRZ128
,
25249 IX86_BUILTIN_PCMPISTRI128
,
25250 IX86_BUILTIN_PCMPISTRM128
,
25251 IX86_BUILTIN_PCMPISTRA128
,
25252 IX86_BUILTIN_PCMPISTRC128
,
25253 IX86_BUILTIN_PCMPISTRO128
,
25254 IX86_BUILTIN_PCMPISTRS128
,
25255 IX86_BUILTIN_PCMPISTRZ128
,
25257 IX86_BUILTIN_PCMPGTQ
,
25259 /* AES instructions */
25260 IX86_BUILTIN_AESENC128
,
25261 IX86_BUILTIN_AESENCLAST128
,
25262 IX86_BUILTIN_AESDEC128
,
25263 IX86_BUILTIN_AESDECLAST128
,
25264 IX86_BUILTIN_AESIMC128
,
25265 IX86_BUILTIN_AESKEYGENASSIST128
,
25267 /* PCLMUL instruction */
25268 IX86_BUILTIN_PCLMULQDQ128
,
25271 IX86_BUILTIN_ADDPD256
,
25272 IX86_BUILTIN_ADDPS256
,
25273 IX86_BUILTIN_ADDSUBPD256
,
25274 IX86_BUILTIN_ADDSUBPS256
,
25275 IX86_BUILTIN_ANDPD256
,
25276 IX86_BUILTIN_ANDPS256
,
25277 IX86_BUILTIN_ANDNPD256
,
25278 IX86_BUILTIN_ANDNPS256
,
25279 IX86_BUILTIN_BLENDPD256
,
25280 IX86_BUILTIN_BLENDPS256
,
25281 IX86_BUILTIN_BLENDVPD256
,
25282 IX86_BUILTIN_BLENDVPS256
,
25283 IX86_BUILTIN_DIVPD256
,
25284 IX86_BUILTIN_DIVPS256
,
25285 IX86_BUILTIN_DPPS256
,
25286 IX86_BUILTIN_HADDPD256
,
25287 IX86_BUILTIN_HADDPS256
,
25288 IX86_BUILTIN_HSUBPD256
,
25289 IX86_BUILTIN_HSUBPS256
,
25290 IX86_BUILTIN_MAXPD256
,
25291 IX86_BUILTIN_MAXPS256
,
25292 IX86_BUILTIN_MINPD256
,
25293 IX86_BUILTIN_MINPS256
,
25294 IX86_BUILTIN_MULPD256
,
25295 IX86_BUILTIN_MULPS256
,
25296 IX86_BUILTIN_ORPD256
,
25297 IX86_BUILTIN_ORPS256
,
25298 IX86_BUILTIN_SHUFPD256
,
25299 IX86_BUILTIN_SHUFPS256
,
25300 IX86_BUILTIN_SUBPD256
,
25301 IX86_BUILTIN_SUBPS256
,
25302 IX86_BUILTIN_XORPD256
,
25303 IX86_BUILTIN_XORPS256
,
25304 IX86_BUILTIN_CMPSD
,
25305 IX86_BUILTIN_CMPSS
,
25306 IX86_BUILTIN_CMPPD
,
25307 IX86_BUILTIN_CMPPS
,
25308 IX86_BUILTIN_CMPPD256
,
25309 IX86_BUILTIN_CMPPS256
,
25310 IX86_BUILTIN_CVTDQ2PD256
,
25311 IX86_BUILTIN_CVTDQ2PS256
,
25312 IX86_BUILTIN_CVTPD2PS256
,
25313 IX86_BUILTIN_CVTPS2DQ256
,
25314 IX86_BUILTIN_CVTPS2PD256
,
25315 IX86_BUILTIN_CVTTPD2DQ256
,
25316 IX86_BUILTIN_CVTPD2DQ256
,
25317 IX86_BUILTIN_CVTTPS2DQ256
,
25318 IX86_BUILTIN_EXTRACTF128PD256
,
25319 IX86_BUILTIN_EXTRACTF128PS256
,
25320 IX86_BUILTIN_EXTRACTF128SI256
,
25321 IX86_BUILTIN_VZEROALL
,
25322 IX86_BUILTIN_VZEROUPPER
,
25323 IX86_BUILTIN_VPERMILVARPD
,
25324 IX86_BUILTIN_VPERMILVARPS
,
25325 IX86_BUILTIN_VPERMILVARPD256
,
25326 IX86_BUILTIN_VPERMILVARPS256
,
25327 IX86_BUILTIN_VPERMILPD
,
25328 IX86_BUILTIN_VPERMILPS
,
25329 IX86_BUILTIN_VPERMILPD256
,
25330 IX86_BUILTIN_VPERMILPS256
,
25331 IX86_BUILTIN_VPERMIL2PD
,
25332 IX86_BUILTIN_VPERMIL2PS
,
25333 IX86_BUILTIN_VPERMIL2PD256
,
25334 IX86_BUILTIN_VPERMIL2PS256
,
25335 IX86_BUILTIN_VPERM2F128PD256
,
25336 IX86_BUILTIN_VPERM2F128PS256
,
25337 IX86_BUILTIN_VPERM2F128SI256
,
25338 IX86_BUILTIN_VBROADCASTSS
,
25339 IX86_BUILTIN_VBROADCASTSD256
,
25340 IX86_BUILTIN_VBROADCASTSS256
,
25341 IX86_BUILTIN_VBROADCASTPD256
,
25342 IX86_BUILTIN_VBROADCASTPS256
,
25343 IX86_BUILTIN_VINSERTF128PD256
,
25344 IX86_BUILTIN_VINSERTF128PS256
,
25345 IX86_BUILTIN_VINSERTF128SI256
,
25346 IX86_BUILTIN_LOADUPD256
,
25347 IX86_BUILTIN_LOADUPS256
,
25348 IX86_BUILTIN_STOREUPD256
,
25349 IX86_BUILTIN_STOREUPS256
,
25350 IX86_BUILTIN_LDDQU256
,
25351 IX86_BUILTIN_MOVNTDQ256
,
25352 IX86_BUILTIN_MOVNTPD256
,
25353 IX86_BUILTIN_MOVNTPS256
,
25354 IX86_BUILTIN_LOADDQU256
,
25355 IX86_BUILTIN_STOREDQU256
,
25356 IX86_BUILTIN_MASKLOADPD
,
25357 IX86_BUILTIN_MASKLOADPS
,
25358 IX86_BUILTIN_MASKSTOREPD
,
25359 IX86_BUILTIN_MASKSTOREPS
,
25360 IX86_BUILTIN_MASKLOADPD256
,
25361 IX86_BUILTIN_MASKLOADPS256
,
25362 IX86_BUILTIN_MASKSTOREPD256
,
25363 IX86_BUILTIN_MASKSTOREPS256
,
25364 IX86_BUILTIN_MOVSHDUP256
,
25365 IX86_BUILTIN_MOVSLDUP256
,
25366 IX86_BUILTIN_MOVDDUP256
,
25368 IX86_BUILTIN_SQRTPD256
,
25369 IX86_BUILTIN_SQRTPS256
,
25370 IX86_BUILTIN_SQRTPS_NR256
,
25371 IX86_BUILTIN_RSQRTPS256
,
25372 IX86_BUILTIN_RSQRTPS_NR256
,
25374 IX86_BUILTIN_RCPPS256
,
25376 IX86_BUILTIN_ROUNDPD256
,
25377 IX86_BUILTIN_ROUNDPS256
,
25379 IX86_BUILTIN_FLOORPD256
,
25380 IX86_BUILTIN_CEILPD256
,
25381 IX86_BUILTIN_TRUNCPD256
,
25382 IX86_BUILTIN_RINTPD256
,
25383 IX86_BUILTIN_ROUNDPD_AZ256
,
25385 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25386 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25387 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25389 IX86_BUILTIN_FLOORPS256
,
25390 IX86_BUILTIN_CEILPS256
,
25391 IX86_BUILTIN_TRUNCPS256
,
25392 IX86_BUILTIN_RINTPS256
,
25393 IX86_BUILTIN_ROUNDPS_AZ256
,
25395 IX86_BUILTIN_FLOORPS_SFIX256
,
25396 IX86_BUILTIN_CEILPS_SFIX256
,
25397 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25399 IX86_BUILTIN_UNPCKHPD256
,
25400 IX86_BUILTIN_UNPCKLPD256
,
25401 IX86_BUILTIN_UNPCKHPS256
,
25402 IX86_BUILTIN_UNPCKLPS256
,
25404 IX86_BUILTIN_SI256_SI
,
25405 IX86_BUILTIN_PS256_PS
,
25406 IX86_BUILTIN_PD256_PD
,
25407 IX86_BUILTIN_SI_SI256
,
25408 IX86_BUILTIN_PS_PS256
,
25409 IX86_BUILTIN_PD_PD256
,
25411 IX86_BUILTIN_VTESTZPD
,
25412 IX86_BUILTIN_VTESTCPD
,
25413 IX86_BUILTIN_VTESTNZCPD
,
25414 IX86_BUILTIN_VTESTZPS
,
25415 IX86_BUILTIN_VTESTCPS
,
25416 IX86_BUILTIN_VTESTNZCPS
,
25417 IX86_BUILTIN_VTESTZPD256
,
25418 IX86_BUILTIN_VTESTCPD256
,
25419 IX86_BUILTIN_VTESTNZCPD256
,
25420 IX86_BUILTIN_VTESTZPS256
,
25421 IX86_BUILTIN_VTESTCPS256
,
25422 IX86_BUILTIN_VTESTNZCPS256
,
25423 IX86_BUILTIN_PTESTZ256
,
25424 IX86_BUILTIN_PTESTC256
,
25425 IX86_BUILTIN_PTESTNZC256
,
25427 IX86_BUILTIN_MOVMSKPD256
,
25428 IX86_BUILTIN_MOVMSKPS256
,
25431 IX86_BUILTIN_MPSADBW256
,
25432 IX86_BUILTIN_PABSB256
,
25433 IX86_BUILTIN_PABSW256
,
25434 IX86_BUILTIN_PABSD256
,
25435 IX86_BUILTIN_PACKSSDW256
,
25436 IX86_BUILTIN_PACKSSWB256
,
25437 IX86_BUILTIN_PACKUSDW256
,
25438 IX86_BUILTIN_PACKUSWB256
,
25439 IX86_BUILTIN_PADDB256
,
25440 IX86_BUILTIN_PADDW256
,
25441 IX86_BUILTIN_PADDD256
,
25442 IX86_BUILTIN_PADDQ256
,
25443 IX86_BUILTIN_PADDSB256
,
25444 IX86_BUILTIN_PADDSW256
,
25445 IX86_BUILTIN_PADDUSB256
,
25446 IX86_BUILTIN_PADDUSW256
,
25447 IX86_BUILTIN_PALIGNR256
,
25448 IX86_BUILTIN_AND256I
,
25449 IX86_BUILTIN_ANDNOT256I
,
25450 IX86_BUILTIN_PAVGB256
,
25451 IX86_BUILTIN_PAVGW256
,
25452 IX86_BUILTIN_PBLENDVB256
,
25453 IX86_BUILTIN_PBLENDVW256
,
25454 IX86_BUILTIN_PCMPEQB256
,
25455 IX86_BUILTIN_PCMPEQW256
,
25456 IX86_BUILTIN_PCMPEQD256
,
25457 IX86_BUILTIN_PCMPEQQ256
,
25458 IX86_BUILTIN_PCMPGTB256
,
25459 IX86_BUILTIN_PCMPGTW256
,
25460 IX86_BUILTIN_PCMPGTD256
,
25461 IX86_BUILTIN_PCMPGTQ256
,
25462 IX86_BUILTIN_PHADDW256
,
25463 IX86_BUILTIN_PHADDD256
,
25464 IX86_BUILTIN_PHADDSW256
,
25465 IX86_BUILTIN_PHSUBW256
,
25466 IX86_BUILTIN_PHSUBD256
,
25467 IX86_BUILTIN_PHSUBSW256
,
25468 IX86_BUILTIN_PMADDUBSW256
,
25469 IX86_BUILTIN_PMADDWD256
,
25470 IX86_BUILTIN_PMAXSB256
,
25471 IX86_BUILTIN_PMAXSW256
,
25472 IX86_BUILTIN_PMAXSD256
,
25473 IX86_BUILTIN_PMAXUB256
,
25474 IX86_BUILTIN_PMAXUW256
,
25475 IX86_BUILTIN_PMAXUD256
,
25476 IX86_BUILTIN_PMINSB256
,
25477 IX86_BUILTIN_PMINSW256
,
25478 IX86_BUILTIN_PMINSD256
,
25479 IX86_BUILTIN_PMINUB256
,
25480 IX86_BUILTIN_PMINUW256
,
25481 IX86_BUILTIN_PMINUD256
,
25482 IX86_BUILTIN_PMOVMSKB256
,
25483 IX86_BUILTIN_PMOVSXBW256
,
25484 IX86_BUILTIN_PMOVSXBD256
,
25485 IX86_BUILTIN_PMOVSXBQ256
,
25486 IX86_BUILTIN_PMOVSXWD256
,
25487 IX86_BUILTIN_PMOVSXWQ256
,
25488 IX86_BUILTIN_PMOVSXDQ256
,
25489 IX86_BUILTIN_PMOVZXBW256
,
25490 IX86_BUILTIN_PMOVZXBD256
,
25491 IX86_BUILTIN_PMOVZXBQ256
,
25492 IX86_BUILTIN_PMOVZXWD256
,
25493 IX86_BUILTIN_PMOVZXWQ256
,
25494 IX86_BUILTIN_PMOVZXDQ256
,
25495 IX86_BUILTIN_PMULDQ256
,
25496 IX86_BUILTIN_PMULHRSW256
,
25497 IX86_BUILTIN_PMULHUW256
,
25498 IX86_BUILTIN_PMULHW256
,
25499 IX86_BUILTIN_PMULLW256
,
25500 IX86_BUILTIN_PMULLD256
,
25501 IX86_BUILTIN_PMULUDQ256
,
25502 IX86_BUILTIN_POR256
,
25503 IX86_BUILTIN_PSADBW256
,
25504 IX86_BUILTIN_PSHUFB256
,
25505 IX86_BUILTIN_PSHUFD256
,
25506 IX86_BUILTIN_PSHUFHW256
,
25507 IX86_BUILTIN_PSHUFLW256
,
25508 IX86_BUILTIN_PSIGNB256
,
25509 IX86_BUILTIN_PSIGNW256
,
25510 IX86_BUILTIN_PSIGND256
,
25511 IX86_BUILTIN_PSLLDQI256
,
25512 IX86_BUILTIN_PSLLWI256
,
25513 IX86_BUILTIN_PSLLW256
,
25514 IX86_BUILTIN_PSLLDI256
,
25515 IX86_BUILTIN_PSLLD256
,
25516 IX86_BUILTIN_PSLLQI256
,
25517 IX86_BUILTIN_PSLLQ256
,
25518 IX86_BUILTIN_PSRAWI256
,
25519 IX86_BUILTIN_PSRAW256
,
25520 IX86_BUILTIN_PSRADI256
,
25521 IX86_BUILTIN_PSRAD256
,
25522 IX86_BUILTIN_PSRLDQI256
,
25523 IX86_BUILTIN_PSRLWI256
,
25524 IX86_BUILTIN_PSRLW256
,
25525 IX86_BUILTIN_PSRLDI256
,
25526 IX86_BUILTIN_PSRLD256
,
25527 IX86_BUILTIN_PSRLQI256
,
25528 IX86_BUILTIN_PSRLQ256
,
25529 IX86_BUILTIN_PSUBB256
,
25530 IX86_BUILTIN_PSUBW256
,
25531 IX86_BUILTIN_PSUBD256
,
25532 IX86_BUILTIN_PSUBQ256
,
25533 IX86_BUILTIN_PSUBSB256
,
25534 IX86_BUILTIN_PSUBSW256
,
25535 IX86_BUILTIN_PSUBUSB256
,
25536 IX86_BUILTIN_PSUBUSW256
,
25537 IX86_BUILTIN_PUNPCKHBW256
,
25538 IX86_BUILTIN_PUNPCKHWD256
,
25539 IX86_BUILTIN_PUNPCKHDQ256
,
25540 IX86_BUILTIN_PUNPCKHQDQ256
,
25541 IX86_BUILTIN_PUNPCKLBW256
,
25542 IX86_BUILTIN_PUNPCKLWD256
,
25543 IX86_BUILTIN_PUNPCKLDQ256
,
25544 IX86_BUILTIN_PUNPCKLQDQ256
,
25545 IX86_BUILTIN_PXOR256
,
25546 IX86_BUILTIN_MOVNTDQA256
,
25547 IX86_BUILTIN_VBROADCASTSS_PS
,
25548 IX86_BUILTIN_VBROADCASTSS_PS256
,
25549 IX86_BUILTIN_VBROADCASTSD_PD256
,
25550 IX86_BUILTIN_VBROADCASTSI256
,
25551 IX86_BUILTIN_PBLENDD256
,
25552 IX86_BUILTIN_PBLENDD128
,
25553 IX86_BUILTIN_PBROADCASTB256
,
25554 IX86_BUILTIN_PBROADCASTW256
,
25555 IX86_BUILTIN_PBROADCASTD256
,
25556 IX86_BUILTIN_PBROADCASTQ256
,
25557 IX86_BUILTIN_PBROADCASTB128
,
25558 IX86_BUILTIN_PBROADCASTW128
,
25559 IX86_BUILTIN_PBROADCASTD128
,
25560 IX86_BUILTIN_PBROADCASTQ128
,
25561 IX86_BUILTIN_VPERMVARSI256
,
25562 IX86_BUILTIN_VPERMDF256
,
25563 IX86_BUILTIN_VPERMVARSF256
,
25564 IX86_BUILTIN_VPERMDI256
,
25565 IX86_BUILTIN_VPERMTI256
,
25566 IX86_BUILTIN_VEXTRACT128I256
,
25567 IX86_BUILTIN_VINSERT128I256
,
25568 IX86_BUILTIN_MASKLOADD
,
25569 IX86_BUILTIN_MASKLOADQ
,
25570 IX86_BUILTIN_MASKLOADD256
,
25571 IX86_BUILTIN_MASKLOADQ256
,
25572 IX86_BUILTIN_MASKSTORED
,
25573 IX86_BUILTIN_MASKSTOREQ
,
25574 IX86_BUILTIN_MASKSTORED256
,
25575 IX86_BUILTIN_MASKSTOREQ256
,
25576 IX86_BUILTIN_PSLLVV4DI
,
25577 IX86_BUILTIN_PSLLVV2DI
,
25578 IX86_BUILTIN_PSLLVV8SI
,
25579 IX86_BUILTIN_PSLLVV4SI
,
25580 IX86_BUILTIN_PSRAVV8SI
,
25581 IX86_BUILTIN_PSRAVV4SI
,
25582 IX86_BUILTIN_PSRLVV4DI
,
25583 IX86_BUILTIN_PSRLVV2DI
,
25584 IX86_BUILTIN_PSRLVV8SI
,
25585 IX86_BUILTIN_PSRLVV4SI
,
25587 IX86_BUILTIN_GATHERSIV2DF
,
25588 IX86_BUILTIN_GATHERSIV4DF
,
25589 IX86_BUILTIN_GATHERDIV2DF
,
25590 IX86_BUILTIN_GATHERDIV4DF
,
25591 IX86_BUILTIN_GATHERSIV4SF
,
25592 IX86_BUILTIN_GATHERSIV8SF
,
25593 IX86_BUILTIN_GATHERDIV4SF
,
25594 IX86_BUILTIN_GATHERDIV8SF
,
25595 IX86_BUILTIN_GATHERSIV2DI
,
25596 IX86_BUILTIN_GATHERSIV4DI
,
25597 IX86_BUILTIN_GATHERDIV2DI
,
25598 IX86_BUILTIN_GATHERDIV4DI
,
25599 IX86_BUILTIN_GATHERSIV4SI
,
25600 IX86_BUILTIN_GATHERSIV8SI
,
25601 IX86_BUILTIN_GATHERDIV4SI
,
25602 IX86_BUILTIN_GATHERDIV8SI
,
25604 /* Alternate 4 element gather for the vectorizer where
25605 all operands are 32-byte wide. */
25606 IX86_BUILTIN_GATHERALTSIV4DF
,
25607 IX86_BUILTIN_GATHERALTDIV8SF
,
25608 IX86_BUILTIN_GATHERALTSIV4DI
,
25609 IX86_BUILTIN_GATHERALTDIV8SI
,
25611 /* TFmode support builtins. */
25613 IX86_BUILTIN_HUGE_VALQ
,
25614 IX86_BUILTIN_FABSQ
,
25615 IX86_BUILTIN_COPYSIGNQ
,
25617 /* Vectorizer support builtins. */
25618 IX86_BUILTIN_CPYSGNPS
,
25619 IX86_BUILTIN_CPYSGNPD
,
25620 IX86_BUILTIN_CPYSGNPS256
,
25621 IX86_BUILTIN_CPYSGNPD256
,
25623 /* FMA4 instructions. */
25624 IX86_BUILTIN_VFMADDSS
,
25625 IX86_BUILTIN_VFMADDSD
,
25626 IX86_BUILTIN_VFMADDPS
,
25627 IX86_BUILTIN_VFMADDPD
,
25628 IX86_BUILTIN_VFMADDPS256
,
25629 IX86_BUILTIN_VFMADDPD256
,
25630 IX86_BUILTIN_VFMADDSUBPS
,
25631 IX86_BUILTIN_VFMADDSUBPD
,
25632 IX86_BUILTIN_VFMADDSUBPS256
,
25633 IX86_BUILTIN_VFMADDSUBPD256
,
25635 /* FMA3 instructions. */
25636 IX86_BUILTIN_VFMADDSS3
,
25637 IX86_BUILTIN_VFMADDSD3
,
25639 /* XOP instructions. */
25640 IX86_BUILTIN_VPCMOV
,
25641 IX86_BUILTIN_VPCMOV_V2DI
,
25642 IX86_BUILTIN_VPCMOV_V4SI
,
25643 IX86_BUILTIN_VPCMOV_V8HI
,
25644 IX86_BUILTIN_VPCMOV_V16QI
,
25645 IX86_BUILTIN_VPCMOV_V4SF
,
25646 IX86_BUILTIN_VPCMOV_V2DF
,
25647 IX86_BUILTIN_VPCMOV256
,
25648 IX86_BUILTIN_VPCMOV_V4DI256
,
25649 IX86_BUILTIN_VPCMOV_V8SI256
,
25650 IX86_BUILTIN_VPCMOV_V16HI256
,
25651 IX86_BUILTIN_VPCMOV_V32QI256
,
25652 IX86_BUILTIN_VPCMOV_V8SF256
,
25653 IX86_BUILTIN_VPCMOV_V4DF256
,
25655 IX86_BUILTIN_VPPERM
,
25657 IX86_BUILTIN_VPMACSSWW
,
25658 IX86_BUILTIN_VPMACSWW
,
25659 IX86_BUILTIN_VPMACSSWD
,
25660 IX86_BUILTIN_VPMACSWD
,
25661 IX86_BUILTIN_VPMACSSDD
,
25662 IX86_BUILTIN_VPMACSDD
,
25663 IX86_BUILTIN_VPMACSSDQL
,
25664 IX86_BUILTIN_VPMACSSDQH
,
25665 IX86_BUILTIN_VPMACSDQL
,
25666 IX86_BUILTIN_VPMACSDQH
,
25667 IX86_BUILTIN_VPMADCSSWD
,
25668 IX86_BUILTIN_VPMADCSWD
,
25670 IX86_BUILTIN_VPHADDBW
,
25671 IX86_BUILTIN_VPHADDBD
,
25672 IX86_BUILTIN_VPHADDBQ
,
25673 IX86_BUILTIN_VPHADDWD
,
25674 IX86_BUILTIN_VPHADDWQ
,
25675 IX86_BUILTIN_VPHADDDQ
,
25676 IX86_BUILTIN_VPHADDUBW
,
25677 IX86_BUILTIN_VPHADDUBD
,
25678 IX86_BUILTIN_VPHADDUBQ
,
25679 IX86_BUILTIN_VPHADDUWD
,
25680 IX86_BUILTIN_VPHADDUWQ
,
25681 IX86_BUILTIN_VPHADDUDQ
,
25682 IX86_BUILTIN_VPHSUBBW
,
25683 IX86_BUILTIN_VPHSUBWD
,
25684 IX86_BUILTIN_VPHSUBDQ
,
25686 IX86_BUILTIN_VPROTB
,
25687 IX86_BUILTIN_VPROTW
,
25688 IX86_BUILTIN_VPROTD
,
25689 IX86_BUILTIN_VPROTQ
,
25690 IX86_BUILTIN_VPROTB_IMM
,
25691 IX86_BUILTIN_VPROTW_IMM
,
25692 IX86_BUILTIN_VPROTD_IMM
,
25693 IX86_BUILTIN_VPROTQ_IMM
,
25695 IX86_BUILTIN_VPSHLB
,
25696 IX86_BUILTIN_VPSHLW
,
25697 IX86_BUILTIN_VPSHLD
,
25698 IX86_BUILTIN_VPSHLQ
,
25699 IX86_BUILTIN_VPSHAB
,
25700 IX86_BUILTIN_VPSHAW
,
25701 IX86_BUILTIN_VPSHAD
,
25702 IX86_BUILTIN_VPSHAQ
,
25704 IX86_BUILTIN_VFRCZSS
,
25705 IX86_BUILTIN_VFRCZSD
,
25706 IX86_BUILTIN_VFRCZPS
,
25707 IX86_BUILTIN_VFRCZPD
,
25708 IX86_BUILTIN_VFRCZPS256
,
25709 IX86_BUILTIN_VFRCZPD256
,
25711 IX86_BUILTIN_VPCOMEQUB
,
25712 IX86_BUILTIN_VPCOMNEUB
,
25713 IX86_BUILTIN_VPCOMLTUB
,
25714 IX86_BUILTIN_VPCOMLEUB
,
25715 IX86_BUILTIN_VPCOMGTUB
,
25716 IX86_BUILTIN_VPCOMGEUB
,
25717 IX86_BUILTIN_VPCOMFALSEUB
,
25718 IX86_BUILTIN_VPCOMTRUEUB
,
25720 IX86_BUILTIN_VPCOMEQUW
,
25721 IX86_BUILTIN_VPCOMNEUW
,
25722 IX86_BUILTIN_VPCOMLTUW
,
25723 IX86_BUILTIN_VPCOMLEUW
,
25724 IX86_BUILTIN_VPCOMGTUW
,
25725 IX86_BUILTIN_VPCOMGEUW
,
25726 IX86_BUILTIN_VPCOMFALSEUW
,
25727 IX86_BUILTIN_VPCOMTRUEUW
,
25729 IX86_BUILTIN_VPCOMEQUD
,
25730 IX86_BUILTIN_VPCOMNEUD
,
25731 IX86_BUILTIN_VPCOMLTUD
,
25732 IX86_BUILTIN_VPCOMLEUD
,
25733 IX86_BUILTIN_VPCOMGTUD
,
25734 IX86_BUILTIN_VPCOMGEUD
,
25735 IX86_BUILTIN_VPCOMFALSEUD
,
25736 IX86_BUILTIN_VPCOMTRUEUD
,
25738 IX86_BUILTIN_VPCOMEQUQ
,
25739 IX86_BUILTIN_VPCOMNEUQ
,
25740 IX86_BUILTIN_VPCOMLTUQ
,
25741 IX86_BUILTIN_VPCOMLEUQ
,
25742 IX86_BUILTIN_VPCOMGTUQ
,
25743 IX86_BUILTIN_VPCOMGEUQ
,
25744 IX86_BUILTIN_VPCOMFALSEUQ
,
25745 IX86_BUILTIN_VPCOMTRUEUQ
,
25747 IX86_BUILTIN_VPCOMEQB
,
25748 IX86_BUILTIN_VPCOMNEB
,
25749 IX86_BUILTIN_VPCOMLTB
,
25750 IX86_BUILTIN_VPCOMLEB
,
25751 IX86_BUILTIN_VPCOMGTB
,
25752 IX86_BUILTIN_VPCOMGEB
,
25753 IX86_BUILTIN_VPCOMFALSEB
,
25754 IX86_BUILTIN_VPCOMTRUEB
,
25756 IX86_BUILTIN_VPCOMEQW
,
25757 IX86_BUILTIN_VPCOMNEW
,
25758 IX86_BUILTIN_VPCOMLTW
,
25759 IX86_BUILTIN_VPCOMLEW
,
25760 IX86_BUILTIN_VPCOMGTW
,
25761 IX86_BUILTIN_VPCOMGEW
,
25762 IX86_BUILTIN_VPCOMFALSEW
,
25763 IX86_BUILTIN_VPCOMTRUEW
,
25765 IX86_BUILTIN_VPCOMEQD
,
25766 IX86_BUILTIN_VPCOMNED
,
25767 IX86_BUILTIN_VPCOMLTD
,
25768 IX86_BUILTIN_VPCOMLED
,
25769 IX86_BUILTIN_VPCOMGTD
,
25770 IX86_BUILTIN_VPCOMGED
,
25771 IX86_BUILTIN_VPCOMFALSED
,
25772 IX86_BUILTIN_VPCOMTRUED
,
25774 IX86_BUILTIN_VPCOMEQQ
,
25775 IX86_BUILTIN_VPCOMNEQ
,
25776 IX86_BUILTIN_VPCOMLTQ
,
25777 IX86_BUILTIN_VPCOMLEQ
,
25778 IX86_BUILTIN_VPCOMGTQ
,
25779 IX86_BUILTIN_VPCOMGEQ
,
25780 IX86_BUILTIN_VPCOMFALSEQ
,
25781 IX86_BUILTIN_VPCOMTRUEQ
,
25783 /* LWP instructions. */
25784 IX86_BUILTIN_LLWPCB
,
25785 IX86_BUILTIN_SLWPCB
,
25786 IX86_BUILTIN_LWPVAL32
,
25787 IX86_BUILTIN_LWPVAL64
,
25788 IX86_BUILTIN_LWPINS32
,
25789 IX86_BUILTIN_LWPINS64
,
25794 IX86_BUILTIN_XBEGIN
,
25796 IX86_BUILTIN_XABORT
,
25797 IX86_BUILTIN_XTEST
,
25799 /* BMI instructions. */
25800 IX86_BUILTIN_BEXTR32
,
25801 IX86_BUILTIN_BEXTR64
,
25804 /* TBM instructions. */
25805 IX86_BUILTIN_BEXTRI32
,
25806 IX86_BUILTIN_BEXTRI64
,
25808 /* BMI2 instructions. */
25809 IX86_BUILTIN_BZHI32
,
25810 IX86_BUILTIN_BZHI64
,
25811 IX86_BUILTIN_PDEP32
,
25812 IX86_BUILTIN_PDEP64
,
25813 IX86_BUILTIN_PEXT32
,
25814 IX86_BUILTIN_PEXT64
,
25816 /* FSGSBASE instructions. */
25817 IX86_BUILTIN_RDFSBASE32
,
25818 IX86_BUILTIN_RDFSBASE64
,
25819 IX86_BUILTIN_RDGSBASE32
,
25820 IX86_BUILTIN_RDGSBASE64
,
25821 IX86_BUILTIN_WRFSBASE32
,
25822 IX86_BUILTIN_WRFSBASE64
,
25823 IX86_BUILTIN_WRGSBASE32
,
25824 IX86_BUILTIN_WRGSBASE64
,
25826 /* RDRND instructions. */
25827 IX86_BUILTIN_RDRAND16_STEP
,
25828 IX86_BUILTIN_RDRAND32_STEP
,
25829 IX86_BUILTIN_RDRAND64_STEP
,
25831 /* F16C instructions. */
25832 IX86_BUILTIN_CVTPH2PS
,
25833 IX86_BUILTIN_CVTPH2PS256
,
25834 IX86_BUILTIN_CVTPS2PH
,
25835 IX86_BUILTIN_CVTPS2PH256
,
25837 /* CFString built-in for darwin */
25838 IX86_BUILTIN_CFSTRING
,
25840 /* Builtins to get CPU type and supported features. */
25841 IX86_BUILTIN_CPU_INIT
,
25842 IX86_BUILTIN_CPU_IS
,
25843 IX86_BUILTIN_CPU_SUPPORTS
,
25848 /* Table for the ix86 builtin decls. */
25849 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
25851 /* Table of all of the builtin functions that are possible with different ISA's
25852 but are waiting to be built until a function is declared to use that
25854 struct builtin_isa
{
25855 const char *name
; /* function name */
25856 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
25857 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
25858 bool const_p
; /* true if the declaration is constant */
25859 bool set_and_not_built_p
;
25862 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
25865 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25866 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25867 function decl in the ix86_builtins array. Returns the function decl or
25868 NULL_TREE, if the builtin was not added.
25870 If the front end has a special hook for builtin functions, delay adding
25871 builtin functions that aren't in the current ISA until the ISA is changed
25872 with function specific optimization. Doing so, can save about 300K for the
25873 default compiler. When the builtin is expanded, check at that time whether
25876 If the front end doesn't have a special hook, record all builtins, even if
25877 it isn't an instruction set in the current ISA in case the user uses
25878 function specific options for a different ISA, so that we don't get scope
25879 errors if a builtin is added in the middle of a function scope. */
25882 def_builtin (HOST_WIDE_INT mask
, const char *name
,
25883 enum ix86_builtin_func_type tcode
,
25884 enum ix86_builtins code
)
25886 tree decl
= NULL_TREE
;
25888 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
25890 ix86_builtins_isa
[(int) code
].isa
= mask
;
25892 mask
&= ~OPTION_MASK_ISA_64BIT
;
25894 || (mask
& ix86_isa_flags
) != 0
25895 || (lang_hooks
.builtin_function
25896 == lang_hooks
.builtin_function_ext_scope
))
25899 tree type
= ix86_get_builtin_func_type (tcode
);
25900 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
25902 ix86_builtins
[(int) code
] = decl
;
25903 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
25907 ix86_builtins
[(int) code
] = NULL_TREE
;
25908 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
25909 ix86_builtins_isa
[(int) code
].name
= name
;
25910 ix86_builtins_isa
[(int) code
].const_p
= false;
25911 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
25918 /* Like def_builtin, but also marks the function decl "const". */
25921 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
25922 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
25924 tree decl
= def_builtin (mask
, name
, tcode
, code
);
25926 TREE_READONLY (decl
) = 1;
25928 ix86_builtins_isa
[(int) code
].const_p
= true;
25933 /* Add any new builtin functions for a given ISA that may not have been
25934 declared. This saves a bit of space compared to adding all of the
25935 declarations to the tree, even if we didn't use them. */
25938 ix86_add_new_builtins (HOST_WIDE_INT isa
)
25942 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
25944 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
25945 && ix86_builtins_isa
[i
].set_and_not_built_p
)
25949 /* Don't define the builtin again. */
25950 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
25952 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
25953 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
25954 type
, i
, BUILT_IN_MD
, NULL
,
25957 ix86_builtins
[i
] = decl
;
25958 if (ix86_builtins_isa
[i
].const_p
)
25959 TREE_READONLY (decl
) = 1;
25964 /* Bits for builtin_description.flag. */
25966 /* Set when we don't support the comparison natively, and should
25967 swap_comparison in order to support it. */
25968 #define BUILTIN_DESC_SWAP_OPERANDS 1
25970 struct builtin_description
25972 const HOST_WIDE_INT mask
;
25973 const enum insn_code icode
;
25974 const char *const name
;
25975 const enum ix86_builtins code
;
25976 const enum rtx_code comparison
;
25980 static const struct builtin_description bdesc_comi
[] =
25982 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
25983 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
25984 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
25985 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
25986 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
25987 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
25988 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
25989 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
25990 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
25991 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
25992 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
25993 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
25994 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
25995 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
25996 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
25997 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
25998 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
25999 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26000 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26001 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26002 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26003 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26004 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26005 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26008 static const struct builtin_description bdesc_pcmpestr
[] =
26011 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26012 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26013 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26014 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26015 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26016 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26017 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26020 static const struct builtin_description bdesc_pcmpistr
[] =
26023 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26024 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26025 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26026 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26027 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26028 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26029 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26032 /* Special builtins with variable number of arguments. */
26033 static const struct builtin_description bdesc_special_args
[] =
26035 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26036 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26037 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26040 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26043 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26046 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26047 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26048 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26050 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26051 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26052 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26053 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26055 /* SSE or 3DNow!A */
26056 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26057 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26060 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26061 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26062 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26063 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26064 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26065 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26066 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26067 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26068 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26069 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26071 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26072 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26075 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26078 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26081 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26082 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26085 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26086 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26088 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26089 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26090 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26091 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26092 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26094 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26095 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26096 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26097 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26098 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26099 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26100 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26102 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26103 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26104 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26106 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26107 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26108 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26109 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26110 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26111 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26112 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26113 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26116 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26117 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26118 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26119 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26120 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26121 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26122 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26123 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26124 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26126 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26127 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26128 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26129 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26130 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26131 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26134 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26135 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26136 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26137 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26138 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26139 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26140 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26141 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26144 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26145 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26146 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26149 /* Builtins with variable number of arguments. */
26150 static const struct builtin_description bdesc_args
[] =
26152 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26153 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26154 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26155 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26156 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26157 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26158 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26161 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26162 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26163 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26164 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26165 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26166 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26168 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26169 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26170 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26171 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26172 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26173 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26174 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26175 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26177 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26178 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26180 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26181 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26182 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26183 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26185 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26186 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26187 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26188 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26189 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26190 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26192 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26193 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26194 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26195 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26196 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26197 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26199 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26200 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26201 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26203 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26205 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26206 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26207 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26208 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26209 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26210 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26212 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26213 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26214 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26215 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26216 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26217 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26219 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26220 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26221 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26222 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26225 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26226 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26227 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26228 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26230 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26231 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26232 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26233 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26234 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26235 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26236 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26237 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26238 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26239 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26240 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26241 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26242 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26243 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26244 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26247 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26248 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26249 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26250 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26251 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26252 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26255 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26256 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26257 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26258 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26259 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26260 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26261 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26262 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26263 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26264 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26265 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26266 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26268 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26270 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26271 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26272 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26273 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26274 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26275 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26276 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26277 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26279 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26280 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26281 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26282 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26283 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26284 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26285 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26286 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26287 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26288 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26289 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26290 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26291 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26292 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26293 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26294 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26295 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26296 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26297 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26298 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26299 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26300 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26302 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26303 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26304 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26305 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26307 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26308 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26309 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26310 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26312 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26314 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26315 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26316 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26317 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26318 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26320 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26321 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26322 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26324 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26326 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26327 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26328 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26330 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26331 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26333 /* SSE MMX or 3Dnow!A */
26334 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26335 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26336 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26338 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26339 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26340 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26341 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26343 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26344 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26346 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26349 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26351 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26352 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26353 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26354 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26355 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26357 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26358 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26359 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26360 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26361 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26363 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26365 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26366 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26367 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26368 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26370 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26371 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26372 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26374 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26375 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26376 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26377 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26378 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26379 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26380 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26381 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26383 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26384 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26385 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26386 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26387 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26388 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26389 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26390 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26391 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26392 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26393 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26394 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26395 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26396 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26397 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26398 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26399 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26400 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26401 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26402 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26404 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26405 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26406 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26407 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26409 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26411 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26412 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26414 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26416 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26417 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26418 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26420 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26422 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26423 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26424 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26425 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26426 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26428 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26429 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26431 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26432 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26433 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26434 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26435 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26436 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26437 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26438 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26440 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26441 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26443 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26444 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26445 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26446 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26448 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26449 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26451 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26452 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26453 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26454 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26455 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26456 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26458 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26459 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26460 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26461 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26463 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26464 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26465 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26466 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26467 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26468 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26469 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26470 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26472 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26473 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26474 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26476 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26477 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26479 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26480 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26482 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26484 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26485 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26486 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26487 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26489 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26490 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26491 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26492 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26493 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26494 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26495 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26497 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26498 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26499 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26500 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26501 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26502 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26505 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26506 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26507 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26508 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26510 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26511 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26512 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26514 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26516 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26519 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26520 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26523 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26524 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26526 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26527 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26528 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26529 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26530 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26531 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26534 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26535 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26536 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26537 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26538 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26539 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26541 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26542 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26543 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26544 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26545 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26546 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26547 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26548 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26549 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26550 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26551 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26552 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26553 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26554 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26555 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26556 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26557 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26558 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26559 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26560 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26561 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26562 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26563 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26564 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26567 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26568 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26571 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26572 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26573 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26574 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26575 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26576 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26577 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26578 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26579 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26580 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26582 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26583 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26584 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26585 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26586 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26587 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26588 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26589 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26590 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26591 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26592 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26593 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26594 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26596 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26597 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26598 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26599 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26600 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26601 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26602 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26603 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26604 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26605 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26606 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26607 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26610 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26611 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26612 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26613 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26615 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26616 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26617 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26618 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26620 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26621 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26623 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26624 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26626 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26627 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26628 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26629 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26631 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26632 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26634 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26635 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26637 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26638 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26639 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26642 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26643 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26644 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26645 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26646 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26649 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26650 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26651 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26652 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26655 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26656 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26658 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26659 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26660 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26661 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26664 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26667 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26668 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26669 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26670 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26671 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26672 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26673 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26674 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26675 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26676 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26677 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26678 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26679 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26680 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26681 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26682 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26683 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26684 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26685 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26686 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26687 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26688 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26689 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26690 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26691 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26692 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26694 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26695 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26696 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26697 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26699 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26700 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26701 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26702 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26703 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26704 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26705 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26706 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26707 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26708 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26709 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26710 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26711 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26712 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26713 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26714 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26715 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26716 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26717 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26718 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26719 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
26720 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26721 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26722 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26723 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26724 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26725 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26726 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26727 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26728 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26729 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26730 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
26731 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
26732 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
26734 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26735 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26736 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26738 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26739 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26740 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26741 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26742 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26744 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26746 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26747 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26749 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26750 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
26751 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
26752 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26754 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26755 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26757 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26758 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26760 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26761 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
26762 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
26763 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26765 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
26766 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
26768 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26769 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26771 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26772 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26773 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26774 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26776 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26777 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26778 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26779 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
26780 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
26781 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
26783 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26784 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26785 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26786 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26787 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26788 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26789 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26790 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26791 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26792 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26793 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26794 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26795 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26796 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26797 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26799 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
26800 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
26802 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26803 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26805 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26808 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
26809 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
26810 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
26811 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
26812 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26813 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26814 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26815 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26816 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26817 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26818 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26819 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26820 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26821 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26822 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26823 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26824 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
26825 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26826 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26827 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26828 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26829 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
26830 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
26831 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26832 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26833 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26834 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26835 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26836 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26837 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26838 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26839 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26840 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26841 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26842 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26843 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26844 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26845 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26846 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
26847 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26848 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26849 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26850 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26851 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26852 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26853 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26854 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26855 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26856 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26857 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26858 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26859 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
26860 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26861 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26862 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26863 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26864 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26865 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26866 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26867 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26868 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26869 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26870 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26871 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26872 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mulv4siv4di3
, "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26873 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26874 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26875 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26876 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26877 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26878 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulv4siv4di3
, "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26879 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26880 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26881 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26882 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
26883 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26884 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26885 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26886 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26887 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26888 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26889 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26890 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26891 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26892 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26893 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26894 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26895 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26896 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26897 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26898 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26899 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26900 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26901 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26902 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26903 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26904 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26905 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26906 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26907 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26908 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26909 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26910 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26911 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26912 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26913 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26914 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26915 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26916 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26917 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26918 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26919 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26920 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26921 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26922 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26923 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26924 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26925 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26926 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26927 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
26928 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26929 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
26930 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
26931 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26932 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26933 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26934 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26935 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26936 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26937 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26938 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26939 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26940 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
26941 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
26942 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
26943 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
26944 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26945 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26946 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26947 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26948 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26949 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26950 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26951 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26952 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26953 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26955 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26958 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26959 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26960 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26963 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26964 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26967 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
26968 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
26969 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
26970 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
26973 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26974 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26975 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26976 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26977 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26978 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26981 /* FMA4 and XOP. */
26982 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26983 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26984 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26985 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26986 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26987 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26988 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26989 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26990 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26991 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26992 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26993 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26994 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26995 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26996 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26997 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26998 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26999 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27000 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27001 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27002 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27003 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27004 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27005 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27006 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27007 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27008 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27009 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27010 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27011 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27012 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27013 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27014 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27015 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27016 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27017 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27018 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27019 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27020 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27021 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27022 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27023 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27024 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27025 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27026 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27027 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27028 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27029 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27030 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27031 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27032 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27033 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27035 static const struct builtin_description bdesc_multi_arg
[] =
27037 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27038 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27039 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27040 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27041 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27042 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27044 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27045 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27046 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27047 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27048 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27049 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27051 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27052 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27053 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27054 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27055 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27056 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27057 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27058 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27059 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27060 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27061 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27062 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27064 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27065 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27066 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27067 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27068 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27069 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27070 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27071 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27072 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27073 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27074 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27075 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27077 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27078 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27079 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27080 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27081 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27082 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27083 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27085 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27086 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27087 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27088 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27089 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27090 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27091 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27093 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27095 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27096 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27097 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27098 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27099 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27100 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27101 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27102 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27103 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27104 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27105 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27106 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27108 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27109 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27110 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27111 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27112 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27113 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27114 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27115 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27116 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27117 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27118 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27119 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27120 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27121 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27122 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27123 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27125 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27126 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27127 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27128 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27129 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27130 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27132 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27133 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27134 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27135 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27136 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27137 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27138 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27139 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27140 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27141 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27142 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27143 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27144 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27145 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27146 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27148 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27149 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27150 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27151 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27152 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27153 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27154 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27156 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27157 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27158 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27159 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27160 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27161 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27162 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27164 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27165 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27166 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27167 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27168 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27169 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27170 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27172 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27173 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27174 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27175 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27176 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27177 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27178 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27180 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27181 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27182 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27183 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27184 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27185 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27186 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27188 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27189 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27190 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27191 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27192 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27193 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27194 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27196 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27197 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27198 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27199 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27200 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27201 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27202 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27204 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27205 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27206 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27207 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27208 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27209 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27210 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27212 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27213 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27214 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27215 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27216 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27217 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27218 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27219 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27221 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27222 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27223 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27224 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27225 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27226 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27227 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27228 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27230 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27231 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27232 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27233 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27237 /* TM vector builtins. */
27239 /* Reuse the existing x86-specific `struct builtin_description' cause
27240 we're lazy. Add casts to make them fit. */
27241 static const struct builtin_description bdesc_tm
[] =
27243 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27244 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27245 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27246 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27247 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27248 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27249 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27251 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27252 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27253 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27254 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27255 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27256 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27257 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27259 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27260 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27261 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27262 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27263 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27264 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27265 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27267 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27268 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27269 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27272 /* TM callbacks. */
27274 /* Return the builtin decl needed to load a vector of TYPE. */
27277 ix86_builtin_tm_load (tree type
)
27279 if (TREE_CODE (type
) == VECTOR_TYPE
)
27281 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27284 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27286 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27288 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27294 /* Return the builtin decl needed to store a vector of TYPE. */
27297 ix86_builtin_tm_store (tree type
)
27299 if (TREE_CODE (type
) == VECTOR_TYPE
)
27301 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27304 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27306 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27308 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27314 /* Initialize the transactional memory vector load/store builtins. */
27317 ix86_init_tm_builtins (void)
27319 enum ix86_builtin_func_type ftype
;
27320 const struct builtin_description
*d
;
27323 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27324 tree attrs_log
, attrs_type_log
;
27329 /* If there are no builtins defined, we must be compiling in a
27330 language without trans-mem support. */
27331 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
27334 /* Use whatever attributes a normal TM load has. */
27335 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27336 attrs_load
= DECL_ATTRIBUTES (decl
);
27337 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27338 /* Use whatever attributes a normal TM store has. */
27339 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27340 attrs_store
= DECL_ATTRIBUTES (decl
);
27341 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27342 /* Use whatever attributes a normal TM log has. */
27343 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27344 attrs_log
= DECL_ATTRIBUTES (decl
);
27345 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27347 for (i
= 0, d
= bdesc_tm
;
27348 i
< ARRAY_SIZE (bdesc_tm
);
27351 if ((d
->mask
& ix86_isa_flags
) != 0
27352 || (lang_hooks
.builtin_function
27353 == lang_hooks
.builtin_function_ext_scope
))
27355 tree type
, attrs
, attrs_type
;
27356 enum built_in_function code
= (enum built_in_function
) d
->code
;
27358 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27359 type
= ix86_get_builtin_func_type (ftype
);
27361 if (BUILTIN_TM_LOAD_P (code
))
27363 attrs
= attrs_load
;
27364 attrs_type
= attrs_type_load
;
27366 else if (BUILTIN_TM_STORE_P (code
))
27368 attrs
= attrs_store
;
27369 attrs_type
= attrs_type_store
;
27374 attrs_type
= attrs_type_log
;
27376 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27377 /* The builtin without the prefix for
27378 calling it directly. */
27379 d
->name
+ strlen ("__builtin_"),
27381 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27382 set the TYPE_ATTRIBUTES. */
27383 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27385 set_builtin_decl (code
, decl
, false);
27390 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27391 in the current target ISA to allow the user to compile particular modules
27392 with different target specific options that differ from the command line
27395 ix86_init_mmx_sse_builtins (void)
27397 const struct builtin_description
* d
;
27398 enum ix86_builtin_func_type ftype
;
27401 /* Add all special builtins with variable number of operands. */
27402 for (i
= 0, d
= bdesc_special_args
;
27403 i
< ARRAY_SIZE (bdesc_special_args
);
27409 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27410 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27413 /* Add all builtins with variable number of operands. */
27414 for (i
= 0, d
= bdesc_args
;
27415 i
< ARRAY_SIZE (bdesc_args
);
27421 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27422 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27425 /* pcmpestr[im] insns. */
27426 for (i
= 0, d
= bdesc_pcmpestr
;
27427 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27430 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27431 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27433 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27434 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27437 /* pcmpistr[im] insns. */
27438 for (i
= 0, d
= bdesc_pcmpistr
;
27439 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27442 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27443 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27445 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27446 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27449 /* comi/ucomi insns. */
27450 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27452 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27453 ftype
= INT_FTYPE_V2DF_V2DF
;
27455 ftype
= INT_FTYPE_V4SF_V4SF
;
27456 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27460 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27461 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27462 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27463 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27465 /* SSE or 3DNow!A */
27466 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27467 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27468 IX86_BUILTIN_MASKMOVQ
);
27471 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27472 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27474 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27475 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27476 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27477 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27480 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27481 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27482 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27483 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27486 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27487 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27488 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27489 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27490 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27491 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27492 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27493 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27494 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27495 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27496 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27497 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27500 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27501 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27504 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27505 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27506 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27507 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27508 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27509 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27510 IX86_BUILTIN_RDRAND64_STEP
);
27513 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27514 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27515 IX86_BUILTIN_GATHERSIV2DF
);
27517 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27518 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27519 IX86_BUILTIN_GATHERSIV4DF
);
27521 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27522 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27523 IX86_BUILTIN_GATHERDIV2DF
);
27525 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27526 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27527 IX86_BUILTIN_GATHERDIV4DF
);
27529 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27530 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27531 IX86_BUILTIN_GATHERSIV4SF
);
27533 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27534 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27535 IX86_BUILTIN_GATHERSIV8SF
);
27537 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27538 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27539 IX86_BUILTIN_GATHERDIV4SF
);
27541 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27542 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27543 IX86_BUILTIN_GATHERDIV8SF
);
27545 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27546 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27547 IX86_BUILTIN_GATHERSIV2DI
);
27549 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27550 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27551 IX86_BUILTIN_GATHERSIV4DI
);
27553 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27554 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27555 IX86_BUILTIN_GATHERDIV2DI
);
27557 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27558 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27559 IX86_BUILTIN_GATHERDIV4DI
);
27561 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27562 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27563 IX86_BUILTIN_GATHERSIV4SI
);
27565 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27566 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27567 IX86_BUILTIN_GATHERSIV8SI
);
27569 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27570 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27571 IX86_BUILTIN_GATHERDIV4SI
);
27573 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27574 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27575 IX86_BUILTIN_GATHERDIV8SI
);
27577 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27578 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27579 IX86_BUILTIN_GATHERALTSIV4DF
);
27581 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27582 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27583 IX86_BUILTIN_GATHERALTDIV8SF
);
27585 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27586 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27587 IX86_BUILTIN_GATHERALTSIV4DI
);
27589 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27590 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27591 IX86_BUILTIN_GATHERALTDIV8SI
);
27594 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
27595 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
27597 /* MMX access to the vec_init patterns. */
27598 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27599 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27601 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27602 V4HI_FTYPE_HI_HI_HI_HI
,
27603 IX86_BUILTIN_VEC_INIT_V4HI
);
27605 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27606 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27607 IX86_BUILTIN_VEC_INIT_V8QI
);
27609 /* Access to the vec_extract patterns. */
27610 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27611 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27612 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27613 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27614 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27615 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27616 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27617 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27618 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27619 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27621 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27622 "__builtin_ia32_vec_ext_v4hi",
27623 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27625 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27626 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27628 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27629 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27631 /* Access to the vec_set patterns. */
27632 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27633 "__builtin_ia32_vec_set_v2di",
27634 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27636 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27637 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27639 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27640 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27642 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27643 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27645 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27646 "__builtin_ia32_vec_set_v4hi",
27647 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27649 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27650 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27652 /* Add FMA4 multi-arg argument instructions */
27653 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27658 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27659 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27663 /* This builds the processor_model struct type defined in
27664 libgcc/config/i386/cpuinfo.c */
27667 build_processor_model_struct (void)
27669 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
27671 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
27673 tree type
= make_node (RECORD_TYPE
);
27675 /* The first 3 fields are unsigned int. */
27676 for (i
= 0; i
< 3; ++i
)
27678 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
27679 get_identifier (field_name
[i
]), unsigned_type_node
);
27680 if (field_chain
!= NULL_TREE
)
27681 DECL_CHAIN (field
) = field_chain
;
27682 field_chain
= field
;
27685 /* The last field is an array of unsigned integers of size one. */
27686 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
27687 get_identifier (field_name
[3]),
27688 build_array_type (unsigned_type_node
,
27689 build_index_type (size_one_node
)));
27690 if (field_chain
!= NULL_TREE
)
27691 DECL_CHAIN (field
) = field_chain
;
27692 field_chain
= field
;
27694 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
27698 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
27701 make_var_decl (tree type
, const char *name
)
27705 new_decl
= build_decl (UNKNOWN_LOCATION
,
27707 get_identifier(name
),
27710 DECL_EXTERNAL (new_decl
) = 1;
27711 TREE_STATIC (new_decl
) = 1;
27712 TREE_PUBLIC (new_decl
) = 1;
27713 DECL_INITIAL (new_decl
) = 0;
27714 DECL_ARTIFICIAL (new_decl
) = 0;
27715 DECL_PRESERVE_P (new_decl
) = 1;
27717 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
27718 assemble_variable (new_decl
, 0, 0, 0);
27723 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
27724 into an integer defined in libgcc/config/i386/cpuinfo.c */
27727 fold_builtin_cpu (tree fndecl
, tree
*args
)
27730 enum ix86_builtins fn_code
= (enum ix86_builtins
)
27731 DECL_FUNCTION_CODE (fndecl
);
27732 tree param_string_cst
= NULL
;
27734 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
27735 enum processor_features
27751 /* These are the values for vendor types and cpu types and subtypes
27752 in cpuinfo.c. Cpu types and subtypes should be subtracted by
27753 the corresponding start value. */
27754 enum processor_model
27764 M_CPU_SUBTYPE_START
,
27765 M_INTEL_COREI7_NEHALEM
,
27766 M_INTEL_COREI7_WESTMERE
,
27767 M_INTEL_COREI7_SANDYBRIDGE
,
27768 M_AMDFAM10H_BARCELONA
,
27769 M_AMDFAM10H_SHANGHAI
,
27770 M_AMDFAM10H_ISTANBUL
,
27771 M_AMDFAM15H_BDVER1
,
27775 static struct _arch_names_table
27777 const char *const name
;
27778 const enum processor_model model
;
27780 const arch_names_table
[] =
27783 {"intel", M_INTEL
},
27784 {"atom", M_INTEL_ATOM
},
27785 {"core2", M_INTEL_CORE2
},
27786 {"corei7", M_INTEL_COREI7
},
27787 {"nehalem", M_INTEL_COREI7_NEHALEM
},
27788 {"westmere", M_INTEL_COREI7_WESTMERE
},
27789 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
27790 {"amdfam10h", M_AMDFAM10H
},
27791 {"barcelona", M_AMDFAM10H_BARCELONA
},
27792 {"shanghai", M_AMDFAM10H_SHANGHAI
},
27793 {"istanbul", M_AMDFAM10H_ISTANBUL
},
27794 {"amdfam15h", M_AMDFAM15H
},
27795 {"bdver1", M_AMDFAM15H_BDVER1
},
27796 {"bdver2", M_AMDFAM15H_BDVER2
},
27799 static struct _isa_names_table
27801 const char *const name
;
27802 const enum processor_features feature
;
27804 const isa_names_table
[] =
27808 {"popcnt", F_POPCNT
},
27812 {"ssse3", F_SSSE3
},
27813 {"sse4.1", F_SSE4_1
},
27814 {"sse4.2", F_SSE4_2
},
27819 static tree __processor_model_type
= NULL_TREE
;
27820 static tree __cpu_model_var
= NULL_TREE
;
27822 if (__processor_model_type
== NULL_TREE
)
27823 __processor_model_type
= build_processor_model_struct ();
27825 if (__cpu_model_var
== NULL_TREE
)
27826 __cpu_model_var
= make_var_decl (__processor_model_type
,
27829 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
27831 param_string_cst
= *args
;
27832 while (param_string_cst
27833 && TREE_CODE (param_string_cst
) != STRING_CST
)
27835 /* *args must be a expr that can contain other EXPRS leading to a
27837 if (!EXPR_P (param_string_cst
))
27839 error ("Parameter to builtin must be a string constant or literal");
27840 return integer_zero_node
;
27842 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
27845 gcc_assert (param_string_cst
);
27847 if (fn_code
== IX86_BUILTIN_CPU_IS
)
27851 unsigned int field_val
= 0;
27852 unsigned int NUM_ARCH_NAMES
27853 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
27855 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
27856 if (strcmp (arch_names_table
[i
].name
,
27857 TREE_STRING_POINTER (param_string_cst
)) == 0)
27860 if (i
== NUM_ARCH_NAMES
)
27862 error ("Parameter to builtin not valid: %s",
27863 TREE_STRING_POINTER (param_string_cst
));
27864 return integer_zero_node
;
27867 field
= TYPE_FIELDS (__processor_model_type
);
27868 field_val
= arch_names_table
[i
].model
;
27870 /* CPU types are stored in the next field. */
27871 if (field_val
> M_CPU_TYPE_START
27872 && field_val
< M_CPU_SUBTYPE_START
)
27874 field
= DECL_CHAIN (field
);
27875 field_val
-= M_CPU_TYPE_START
;
27878 /* CPU subtypes are stored in the next field. */
27879 if (field_val
> M_CPU_SUBTYPE_START
)
27881 field
= DECL_CHAIN ( DECL_CHAIN (field
));
27882 field_val
-= M_CPU_SUBTYPE_START
;
27885 /* Get the appropriate field in __cpu_model. */
27886 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
27889 /* Check the value. */
27890 return build2 (EQ_EXPR
, unsigned_type_node
, ref
,
27891 build_int_cstu (unsigned_type_node
, field_val
));
27893 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
27898 unsigned int field_val
= 0;
27899 unsigned int NUM_ISA_NAMES
27900 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
27902 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
27903 if (strcmp (isa_names_table
[i
].name
,
27904 TREE_STRING_POINTER (param_string_cst
)) == 0)
27907 if (i
== NUM_ISA_NAMES
)
27909 error ("Parameter to builtin not valid: %s",
27910 TREE_STRING_POINTER (param_string_cst
));
27911 return integer_zero_node
;
27914 field
= TYPE_FIELDS (__processor_model_type
);
27915 /* Get the last field, which is __cpu_features. */
27916 while (DECL_CHAIN (field
))
27917 field
= DECL_CHAIN (field
);
27919 /* Get the appropriate field: __cpu_model.__cpu_features */
27920 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
27923 /* Access the 0th element of __cpu_features array. */
27924 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
27925 integer_zero_node
, NULL_TREE
, NULL_TREE
);
27927 field_val
= (1 << isa_names_table
[i
].feature
);
27928 /* Return __cpu_model.__cpu_features[0] & field_val */
27929 return build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
27930 build_int_cstu (unsigned_type_node
, field_val
));
27932 gcc_unreachable ();
27936 ix86_fold_builtin (tree fndecl
, int n_args
,
27937 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
27939 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
27941 enum ix86_builtins fn_code
= (enum ix86_builtins
)
27942 DECL_FUNCTION_CODE (fndecl
);
27943 if (fn_code
== IX86_BUILTIN_CPU_IS
27944 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
27946 gcc_assert (n_args
== 1);
27947 return fold_builtin_cpu (fndecl
, args
);
27954 /* Make builtins to detect cpu type and features supported. NAME is
27955 the builtin name, CODE is the builtin code, and FTYPE is the function
27956 type of the builtin. */
27959 make_cpu_type_builtin (const char* name
, int code
,
27960 enum ix86_builtin_func_type ftype
, bool is_const
)
27965 type
= ix86_get_builtin_func_type (ftype
);
27966 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
27968 gcc_assert (decl
!= NULL_TREE
);
27969 ix86_builtins
[(int) code
] = decl
;
27970 TREE_READONLY (decl
) = is_const
;
27973 /* Make builtins to get CPU type and features supported. The created
27976 __builtin_cpu_init (), to detect cpu type and features,
27977 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
27978 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
27982 ix86_init_platform_type_builtins (void)
27984 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
27985 INT_FTYPE_VOID
, false);
27986 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
27987 INT_FTYPE_PCCHAR
, true);
27988 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
27989 INT_FTYPE_PCCHAR
, true);
27992 /* Internal method for ix86_init_builtins. */
27995 ix86_init_builtins_va_builtins_abi (void)
27997 tree ms_va_ref
, sysv_va_ref
;
27998 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
27999 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
28000 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
28001 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
28005 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
28006 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
28007 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
28009 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
28012 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28013 fnvoid_va_start_ms
=
28014 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28015 fnvoid_va_end_sysv
=
28016 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
28017 fnvoid_va_start_sysv
=
28018 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
28020 fnvoid_va_copy_ms
=
28021 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
28023 fnvoid_va_copy_sysv
=
28024 build_function_type_list (void_type_node
, sysv_va_ref
,
28025 sysv_va_ref
, NULL_TREE
);
28027 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
28028 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28029 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
28030 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28031 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
28032 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28033 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
28034 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28035 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
28036 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28037 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
28038 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28042 ix86_init_builtin_types (void)
28044 tree float128_type_node
, float80_type_node
;
28046 /* The __float80 type. */
28047 float80_type_node
= long_double_type_node
;
28048 if (TYPE_MODE (float80_type_node
) != XFmode
)
28050 /* The __float80 type. */
28051 float80_type_node
= make_node (REAL_TYPE
);
28053 TYPE_PRECISION (float80_type_node
) = 80;
28054 layout_type (float80_type_node
);
28056 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
28058 /* The __float128 type. */
28059 float128_type_node
= make_node (REAL_TYPE
);
28060 TYPE_PRECISION (float128_type_node
) = 128;
28061 layout_type (float128_type_node
);
28062 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
28064 /* This macro is built by i386-builtin-types.awk. */
28065 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
28069 ix86_init_builtins (void)
28073 ix86_init_builtin_types ();
28075 /* Builtins to get CPU type and features. */
28076 ix86_init_platform_type_builtins ();
28078 /* TFmode support builtins. */
28079 def_builtin_const (0, "__builtin_infq",
28080 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
28081 def_builtin_const (0, "__builtin_huge_valq",
28082 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
28084 /* We will expand them to normal call if SSE isn't available since
28085 they are used by libgcc. */
28086 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
28087 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
28088 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
28089 TREE_READONLY (t
) = 1;
28090 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
28092 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
28093 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
28094 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
28095 TREE_READONLY (t
) = 1;
28096 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
28098 ix86_init_tm_builtins ();
28099 ix86_init_mmx_sse_builtins ();
28102 ix86_init_builtins_va_builtins_abi ();
28104 #ifdef SUBTARGET_INIT_BUILTINS
28105 SUBTARGET_INIT_BUILTINS
;
28109 /* Return the ix86 builtin for CODE. */
28112 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
28114 if (code
>= IX86_BUILTIN_MAX
)
28115 return error_mark_node
;
28117 return ix86_builtins
[code
];
28120 /* Errors in the source file can cause expand_expr to return const0_rtx
28121 where we expect a vector. To avoid crashing, use one of the vector
28122 clear instructions. */
28124 safe_vector_operand (rtx x
, enum machine_mode mode
)
28126 if (x
== const0_rtx
)
28127 x
= CONST0_RTX (mode
);
28131 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
28134 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
28137 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28138 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28139 rtx op0
= expand_normal (arg0
);
28140 rtx op1
= expand_normal (arg1
);
28141 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28142 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28143 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
28145 if (VECTOR_MODE_P (mode0
))
28146 op0
= safe_vector_operand (op0
, mode0
);
28147 if (VECTOR_MODE_P (mode1
))
28148 op1
= safe_vector_operand (op1
, mode1
);
28150 if (optimize
|| !target
28151 || GET_MODE (target
) != tmode
28152 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28153 target
= gen_reg_rtx (tmode
);
28155 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
28157 rtx x
= gen_reg_rtx (V4SImode
);
28158 emit_insn (gen_sse2_loadd (x
, op1
));
28159 op1
= gen_lowpart (TImode
, x
);
28162 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28163 op0
= copy_to_mode_reg (mode0
, op0
);
28164 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
28165 op1
= copy_to_mode_reg (mode1
, op1
);
28167 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28176 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
28179 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
28180 enum ix86_builtin_func_type m_type
,
28181 enum rtx_code sub_code
)
28186 bool comparison_p
= false;
28188 bool last_arg_constant
= false;
28189 int num_memory
= 0;
28192 enum machine_mode mode
;
28195 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28199 case MULTI_ARG_4_DF2_DI_I
:
28200 case MULTI_ARG_4_DF2_DI_I1
:
28201 case MULTI_ARG_4_SF2_SI_I
:
28202 case MULTI_ARG_4_SF2_SI_I1
:
28204 last_arg_constant
= true;
28207 case MULTI_ARG_3_SF
:
28208 case MULTI_ARG_3_DF
:
28209 case MULTI_ARG_3_SF2
:
28210 case MULTI_ARG_3_DF2
:
28211 case MULTI_ARG_3_DI
:
28212 case MULTI_ARG_3_SI
:
28213 case MULTI_ARG_3_SI_DI
:
28214 case MULTI_ARG_3_HI
:
28215 case MULTI_ARG_3_HI_SI
:
28216 case MULTI_ARG_3_QI
:
28217 case MULTI_ARG_3_DI2
:
28218 case MULTI_ARG_3_SI2
:
28219 case MULTI_ARG_3_HI2
:
28220 case MULTI_ARG_3_QI2
:
28224 case MULTI_ARG_2_SF
:
28225 case MULTI_ARG_2_DF
:
28226 case MULTI_ARG_2_DI
:
28227 case MULTI_ARG_2_SI
:
28228 case MULTI_ARG_2_HI
:
28229 case MULTI_ARG_2_QI
:
28233 case MULTI_ARG_2_DI_IMM
:
28234 case MULTI_ARG_2_SI_IMM
:
28235 case MULTI_ARG_2_HI_IMM
:
28236 case MULTI_ARG_2_QI_IMM
:
28238 last_arg_constant
= true;
28241 case MULTI_ARG_1_SF
:
28242 case MULTI_ARG_1_DF
:
28243 case MULTI_ARG_1_SF2
:
28244 case MULTI_ARG_1_DF2
:
28245 case MULTI_ARG_1_DI
:
28246 case MULTI_ARG_1_SI
:
28247 case MULTI_ARG_1_HI
:
28248 case MULTI_ARG_1_QI
:
28249 case MULTI_ARG_1_SI_DI
:
28250 case MULTI_ARG_1_HI_DI
:
28251 case MULTI_ARG_1_HI_SI
:
28252 case MULTI_ARG_1_QI_DI
:
28253 case MULTI_ARG_1_QI_SI
:
28254 case MULTI_ARG_1_QI_HI
:
28258 case MULTI_ARG_2_DI_CMP
:
28259 case MULTI_ARG_2_SI_CMP
:
28260 case MULTI_ARG_2_HI_CMP
:
28261 case MULTI_ARG_2_QI_CMP
:
28263 comparison_p
= true;
28266 case MULTI_ARG_2_SF_TF
:
28267 case MULTI_ARG_2_DF_TF
:
28268 case MULTI_ARG_2_DI_TF
:
28269 case MULTI_ARG_2_SI_TF
:
28270 case MULTI_ARG_2_HI_TF
:
28271 case MULTI_ARG_2_QI_TF
:
28277 gcc_unreachable ();
28280 if (optimize
|| !target
28281 || GET_MODE (target
) != tmode
28282 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28283 target
= gen_reg_rtx (tmode
);
28285 gcc_assert (nargs
<= 4);
28287 for (i
= 0; i
< nargs
; i
++)
28289 tree arg
= CALL_EXPR_ARG (exp
, i
);
28290 rtx op
= expand_normal (arg
);
28291 int adjust
= (comparison_p
) ? 1 : 0;
28292 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
28294 if (last_arg_constant
&& i
== nargs
- 1)
28296 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
28298 enum insn_code new_icode
= icode
;
28301 case CODE_FOR_xop_vpermil2v2df3
:
28302 case CODE_FOR_xop_vpermil2v4sf3
:
28303 case CODE_FOR_xop_vpermil2v4df3
:
28304 case CODE_FOR_xop_vpermil2v8sf3
:
28305 error ("the last argument must be a 2-bit immediate");
28306 return gen_reg_rtx (tmode
);
28307 case CODE_FOR_xop_rotlv2di3
:
28308 new_icode
= CODE_FOR_rotlv2di3
;
28310 case CODE_FOR_xop_rotlv4si3
:
28311 new_icode
= CODE_FOR_rotlv4si3
;
28313 case CODE_FOR_xop_rotlv8hi3
:
28314 new_icode
= CODE_FOR_rotlv8hi3
;
28316 case CODE_FOR_xop_rotlv16qi3
:
28317 new_icode
= CODE_FOR_rotlv16qi3
;
28319 if (CONST_INT_P (op
))
28321 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
28322 op
= GEN_INT (INTVAL (op
) & mask
);
28323 gcc_checking_assert
28324 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
28328 gcc_checking_assert
28330 && insn_data
[new_icode
].operand
[0].mode
== tmode
28331 && insn_data
[new_icode
].operand
[1].mode
== tmode
28332 && insn_data
[new_icode
].operand
[2].mode
== mode
28333 && insn_data
[new_icode
].operand
[0].predicate
28334 == insn_data
[icode
].operand
[0].predicate
28335 && insn_data
[new_icode
].operand
[1].predicate
28336 == insn_data
[icode
].operand
[1].predicate
);
28342 gcc_unreachable ();
28349 if (VECTOR_MODE_P (mode
))
28350 op
= safe_vector_operand (op
, mode
);
28352 /* If we aren't optimizing, only allow one memory operand to be
28354 if (memory_operand (op
, mode
))
28357 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
28360 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
28362 op
= force_reg (mode
, op
);
28366 args
[i
].mode
= mode
;
28372 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
28377 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
28378 GEN_INT ((int)sub_code
));
28379 else if (! comparison_p
)
28380 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
28383 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
28387 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
28392 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
28396 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
28400 gcc_unreachable ();
28410 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28411 insns with vec_merge. */
28414 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
28418 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28419 rtx op1
, op0
= expand_normal (arg0
);
28420 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28421 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28423 if (optimize
|| !target
28424 || GET_MODE (target
) != tmode
28425 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28426 target
= gen_reg_rtx (tmode
);
28428 if (VECTOR_MODE_P (mode0
))
28429 op0
= safe_vector_operand (op0
, mode0
);
28431 if ((optimize
&& !register_operand (op0
, mode0
))
28432 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28433 op0
= copy_to_mode_reg (mode0
, op0
);
28436 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
28437 op1
= copy_to_mode_reg (mode0
, op1
);
28439 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28446 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28449 ix86_expand_sse_compare (const struct builtin_description
*d
,
28450 tree exp
, rtx target
, bool swap
)
28453 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28454 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28455 rtx op0
= expand_normal (arg0
);
28456 rtx op1
= expand_normal (arg1
);
28458 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28459 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28460 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28461 enum rtx_code comparison
= d
->comparison
;
28463 if (VECTOR_MODE_P (mode0
))
28464 op0
= safe_vector_operand (op0
, mode0
);
28465 if (VECTOR_MODE_P (mode1
))
28466 op1
= safe_vector_operand (op1
, mode1
);
28468 /* Swap operands if we have a comparison that isn't available in
28472 rtx tmp
= gen_reg_rtx (mode1
);
28473 emit_move_insn (tmp
, op1
);
28478 if (optimize
|| !target
28479 || GET_MODE (target
) != tmode
28480 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28481 target
= gen_reg_rtx (tmode
);
28483 if ((optimize
&& !register_operand (op0
, mode0
))
28484 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
28485 op0
= copy_to_mode_reg (mode0
, op0
);
28486 if ((optimize
&& !register_operand (op1
, mode1
))
28487 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
28488 op1
= copy_to_mode_reg (mode1
, op1
);
28490 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
28491 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28498 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28501 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28505 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28506 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28507 rtx op0
= expand_normal (arg0
);
28508 rtx op1
= expand_normal (arg1
);
28509 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28510 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28511 enum rtx_code comparison
= d
->comparison
;
28513 if (VECTOR_MODE_P (mode0
))
28514 op0
= safe_vector_operand (op0
, mode0
);
28515 if (VECTOR_MODE_P (mode1
))
28516 op1
= safe_vector_operand (op1
, mode1
);
28518 /* Swap operands if we have a comparison that isn't available in
28520 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28527 target
= gen_reg_rtx (SImode
);
28528 emit_move_insn (target
, const0_rtx
);
28529 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28531 if ((optimize
&& !register_operand (op0
, mode0
))
28532 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28533 op0
= copy_to_mode_reg (mode0
, op0
);
28534 if ((optimize
&& !register_operand (op1
, mode1
))
28535 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28536 op1
= copy_to_mode_reg (mode1
, op1
);
28538 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28542 emit_insn (gen_rtx_SET (VOIDmode
,
28543 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28544 gen_rtx_fmt_ee (comparison
, QImode
,
28548 return SUBREG_REG (target
);
28551 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28554 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28558 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28559 rtx op1
, op0
= expand_normal (arg0
);
28560 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28561 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28563 if (optimize
|| target
== 0
28564 || GET_MODE (target
) != tmode
28565 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28566 target
= gen_reg_rtx (tmode
);
28568 if (VECTOR_MODE_P (mode0
))
28569 op0
= safe_vector_operand (op0
, mode0
);
28571 if ((optimize
&& !register_operand (op0
, mode0
))
28572 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28573 op0
= copy_to_mode_reg (mode0
, op0
);
28575 op1
= GEN_INT (d
->comparison
);
28577 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28585 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28586 tree exp
, rtx target
)
28589 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28590 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28591 rtx op0
= expand_normal (arg0
);
28592 rtx op1
= expand_normal (arg1
);
28594 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28595 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28596 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28598 if (optimize
|| target
== 0
28599 || GET_MODE (target
) != tmode
28600 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28601 target
= gen_reg_rtx (tmode
);
28603 op0
= safe_vector_operand (op0
, mode0
);
28604 op1
= safe_vector_operand (op1
, mode1
);
28606 if ((optimize
&& !register_operand (op0
, mode0
))
28607 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28608 op0
= copy_to_mode_reg (mode0
, op0
);
28609 if ((optimize
&& !register_operand (op1
, mode1
))
28610 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28611 op1
= copy_to_mode_reg (mode1
, op1
);
28613 op2
= GEN_INT (d
->comparison
);
28615 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28622 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28625 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
28629 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28630 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28631 rtx op0
= expand_normal (arg0
);
28632 rtx op1
= expand_normal (arg1
);
28633 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28634 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28635 enum rtx_code comparison
= d
->comparison
;
28637 if (VECTOR_MODE_P (mode0
))
28638 op0
= safe_vector_operand (op0
, mode0
);
28639 if (VECTOR_MODE_P (mode1
))
28640 op1
= safe_vector_operand (op1
, mode1
);
28642 target
= gen_reg_rtx (SImode
);
28643 emit_move_insn (target
, const0_rtx
);
28644 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28646 if ((optimize
&& !register_operand (op0
, mode0
))
28647 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28648 op0
= copy_to_mode_reg (mode0
, op0
);
28649 if ((optimize
&& !register_operand (op1
, mode1
))
28650 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28651 op1
= copy_to_mode_reg (mode1
, op1
);
28653 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28657 emit_insn (gen_rtx_SET (VOIDmode
,
28658 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28659 gen_rtx_fmt_ee (comparison
, QImode
,
28663 return SUBREG_REG (target
);
28666 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28669 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
28670 tree exp
, rtx target
)
28673 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28674 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28675 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28676 tree arg3
= CALL_EXPR_ARG (exp
, 3);
28677 tree arg4
= CALL_EXPR_ARG (exp
, 4);
28678 rtx scratch0
, scratch1
;
28679 rtx op0
= expand_normal (arg0
);
28680 rtx op1
= expand_normal (arg1
);
28681 rtx op2
= expand_normal (arg2
);
28682 rtx op3
= expand_normal (arg3
);
28683 rtx op4
= expand_normal (arg4
);
28684 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
28686 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28687 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28688 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28689 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
28690 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
28691 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
28692 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
28694 if (VECTOR_MODE_P (modev2
))
28695 op0
= safe_vector_operand (op0
, modev2
);
28696 if (VECTOR_MODE_P (modev4
))
28697 op2
= safe_vector_operand (op2
, modev4
);
28699 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28700 op0
= copy_to_mode_reg (modev2
, op0
);
28701 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
28702 op1
= copy_to_mode_reg (modei3
, op1
);
28703 if ((optimize
&& !register_operand (op2
, modev4
))
28704 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
28705 op2
= copy_to_mode_reg (modev4
, op2
);
28706 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
28707 op3
= copy_to_mode_reg (modei5
, op3
);
28709 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
28711 error ("the fifth argument must be an 8-bit immediate");
28715 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
28717 if (optimize
|| !target
28718 || GET_MODE (target
) != tmode0
28719 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28720 target
= gen_reg_rtx (tmode0
);
28722 scratch1
= gen_reg_rtx (tmode1
);
28724 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28726 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28728 if (optimize
|| !target
28729 || GET_MODE (target
) != tmode1
28730 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28731 target
= gen_reg_rtx (tmode1
);
28733 scratch0
= gen_reg_rtx (tmode0
);
28735 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
28739 gcc_assert (d
->flag
);
28741 scratch0
= gen_reg_rtx (tmode0
);
28742 scratch1
= gen_reg_rtx (tmode1
);
28744 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28754 target
= gen_reg_rtx (SImode
);
28755 emit_move_insn (target
, const0_rtx
);
28756 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28759 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28760 gen_rtx_fmt_ee (EQ
, QImode
,
28761 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28764 return SUBREG_REG (target
);
28771 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28774 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
28775 tree exp
, rtx target
)
28778 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28779 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28780 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28781 rtx scratch0
, scratch1
;
28782 rtx op0
= expand_normal (arg0
);
28783 rtx op1
= expand_normal (arg1
);
28784 rtx op2
= expand_normal (arg2
);
28785 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
28787 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28788 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28789 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28790 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
28791 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
28793 if (VECTOR_MODE_P (modev2
))
28794 op0
= safe_vector_operand (op0
, modev2
);
28795 if (VECTOR_MODE_P (modev3
))
28796 op1
= safe_vector_operand (op1
, modev3
);
28798 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28799 op0
= copy_to_mode_reg (modev2
, op0
);
28800 if ((optimize
&& !register_operand (op1
, modev3
))
28801 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
28802 op1
= copy_to_mode_reg (modev3
, op1
);
28804 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
28806 error ("the third argument must be an 8-bit immediate");
28810 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
28812 if (optimize
|| !target
28813 || GET_MODE (target
) != tmode0
28814 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28815 target
= gen_reg_rtx (tmode0
);
28817 scratch1
= gen_reg_rtx (tmode1
);
28819 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
28821 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28823 if (optimize
|| !target
28824 || GET_MODE (target
) != tmode1
28825 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28826 target
= gen_reg_rtx (tmode1
);
28828 scratch0
= gen_reg_rtx (tmode0
);
28830 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
28834 gcc_assert (d
->flag
);
28836 scratch0
= gen_reg_rtx (tmode0
);
28837 scratch1
= gen_reg_rtx (tmode1
);
28839 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
28849 target
= gen_reg_rtx (SImode
);
28850 emit_move_insn (target
, const0_rtx
);
28851 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28854 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28855 gen_rtx_fmt_ee (EQ
, QImode
,
28856 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28859 return SUBREG_REG (target
);
28865 /* Subroutine of ix86_expand_builtin to take care of insns with
28866 variable number of operands. */
28869 ix86_expand_args_builtin (const struct builtin_description
*d
,
28870 tree exp
, rtx target
)
28872 rtx pat
, real_target
;
28873 unsigned int i
, nargs
;
28874 unsigned int nargs_constant
= 0;
28875 int num_memory
= 0;
28879 enum machine_mode mode
;
28881 bool last_arg_count
= false;
28882 enum insn_code icode
= d
->icode
;
28883 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28884 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28885 enum machine_mode rmode
= VOIDmode
;
28887 enum rtx_code comparison
= d
->comparison
;
28889 switch ((enum ix86_builtin_func_type
) d
->flag
)
28891 case V2DF_FTYPE_V2DF_ROUND
:
28892 case V4DF_FTYPE_V4DF_ROUND
:
28893 case V4SF_FTYPE_V4SF_ROUND
:
28894 case V8SF_FTYPE_V8SF_ROUND
:
28895 case V4SI_FTYPE_V4SF_ROUND
:
28896 case V8SI_FTYPE_V8SF_ROUND
:
28897 return ix86_expand_sse_round (d
, exp
, target
);
28898 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
28899 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
28900 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
28901 case INT_FTYPE_V8SF_V8SF_PTEST
:
28902 case INT_FTYPE_V4DI_V4DI_PTEST
:
28903 case INT_FTYPE_V4DF_V4DF_PTEST
:
28904 case INT_FTYPE_V4SF_V4SF_PTEST
:
28905 case INT_FTYPE_V2DI_V2DI_PTEST
:
28906 case INT_FTYPE_V2DF_V2DF_PTEST
:
28907 return ix86_expand_sse_ptest (d
, exp
, target
);
28908 case FLOAT128_FTYPE_FLOAT128
:
28909 case FLOAT_FTYPE_FLOAT
:
28910 case INT_FTYPE_INT
:
28911 case UINT64_FTYPE_INT
:
28912 case UINT16_FTYPE_UINT16
:
28913 case INT64_FTYPE_INT64
:
28914 case INT64_FTYPE_V4SF
:
28915 case INT64_FTYPE_V2DF
:
28916 case INT_FTYPE_V16QI
:
28917 case INT_FTYPE_V8QI
:
28918 case INT_FTYPE_V8SF
:
28919 case INT_FTYPE_V4DF
:
28920 case INT_FTYPE_V4SF
:
28921 case INT_FTYPE_V2DF
:
28922 case INT_FTYPE_V32QI
:
28923 case V16QI_FTYPE_V16QI
:
28924 case V8SI_FTYPE_V8SF
:
28925 case V8SI_FTYPE_V4SI
:
28926 case V8HI_FTYPE_V8HI
:
28927 case V8HI_FTYPE_V16QI
:
28928 case V8QI_FTYPE_V8QI
:
28929 case V8SF_FTYPE_V8SF
:
28930 case V8SF_FTYPE_V8SI
:
28931 case V8SF_FTYPE_V4SF
:
28932 case V8SF_FTYPE_V8HI
:
28933 case V4SI_FTYPE_V4SI
:
28934 case V4SI_FTYPE_V16QI
:
28935 case V4SI_FTYPE_V4SF
:
28936 case V4SI_FTYPE_V8SI
:
28937 case V4SI_FTYPE_V8HI
:
28938 case V4SI_FTYPE_V4DF
:
28939 case V4SI_FTYPE_V2DF
:
28940 case V4HI_FTYPE_V4HI
:
28941 case V4DF_FTYPE_V4DF
:
28942 case V4DF_FTYPE_V4SI
:
28943 case V4DF_FTYPE_V4SF
:
28944 case V4DF_FTYPE_V2DF
:
28945 case V4SF_FTYPE_V4SF
:
28946 case V4SF_FTYPE_V4SI
:
28947 case V4SF_FTYPE_V8SF
:
28948 case V4SF_FTYPE_V4DF
:
28949 case V4SF_FTYPE_V8HI
:
28950 case V4SF_FTYPE_V2DF
:
28951 case V2DI_FTYPE_V2DI
:
28952 case V2DI_FTYPE_V16QI
:
28953 case V2DI_FTYPE_V8HI
:
28954 case V2DI_FTYPE_V4SI
:
28955 case V2DF_FTYPE_V2DF
:
28956 case V2DF_FTYPE_V4SI
:
28957 case V2DF_FTYPE_V4DF
:
28958 case V2DF_FTYPE_V4SF
:
28959 case V2DF_FTYPE_V2SI
:
28960 case V2SI_FTYPE_V2SI
:
28961 case V2SI_FTYPE_V4SF
:
28962 case V2SI_FTYPE_V2SF
:
28963 case V2SI_FTYPE_V2DF
:
28964 case V2SF_FTYPE_V2SF
:
28965 case V2SF_FTYPE_V2SI
:
28966 case V32QI_FTYPE_V32QI
:
28967 case V32QI_FTYPE_V16QI
:
28968 case V16HI_FTYPE_V16HI
:
28969 case V16HI_FTYPE_V8HI
:
28970 case V8SI_FTYPE_V8SI
:
28971 case V16HI_FTYPE_V16QI
:
28972 case V8SI_FTYPE_V16QI
:
28973 case V4DI_FTYPE_V16QI
:
28974 case V8SI_FTYPE_V8HI
:
28975 case V4DI_FTYPE_V8HI
:
28976 case V4DI_FTYPE_V4SI
:
28977 case V4DI_FTYPE_V2DI
:
28980 case V4SF_FTYPE_V4SF_VEC_MERGE
:
28981 case V2DF_FTYPE_V2DF_VEC_MERGE
:
28982 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
28983 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
28984 case V16QI_FTYPE_V16QI_V16QI
:
28985 case V16QI_FTYPE_V8HI_V8HI
:
28986 case V8QI_FTYPE_V8QI_V8QI
:
28987 case V8QI_FTYPE_V4HI_V4HI
:
28988 case V8HI_FTYPE_V8HI_V8HI
:
28989 case V8HI_FTYPE_V16QI_V16QI
:
28990 case V8HI_FTYPE_V4SI_V4SI
:
28991 case V8SF_FTYPE_V8SF_V8SF
:
28992 case V8SF_FTYPE_V8SF_V8SI
:
28993 case V4SI_FTYPE_V4SI_V4SI
:
28994 case V4SI_FTYPE_V8HI_V8HI
:
28995 case V4SI_FTYPE_V4SF_V4SF
:
28996 case V4SI_FTYPE_V2DF_V2DF
:
28997 case V4HI_FTYPE_V4HI_V4HI
:
28998 case V4HI_FTYPE_V8QI_V8QI
:
28999 case V4HI_FTYPE_V2SI_V2SI
:
29000 case V4DF_FTYPE_V4DF_V4DF
:
29001 case V4DF_FTYPE_V4DF_V4DI
:
29002 case V4SF_FTYPE_V4SF_V4SF
:
29003 case V4SF_FTYPE_V4SF_V4SI
:
29004 case V4SF_FTYPE_V4SF_V2SI
:
29005 case V4SF_FTYPE_V4SF_V2DF
:
29006 case V4SF_FTYPE_V4SF_DI
:
29007 case V4SF_FTYPE_V4SF_SI
:
29008 case V2DI_FTYPE_V2DI_V2DI
:
29009 case V2DI_FTYPE_V16QI_V16QI
:
29010 case V2DI_FTYPE_V4SI_V4SI
:
29011 case V2DI_FTYPE_V2DI_V16QI
:
29012 case V2DI_FTYPE_V2DF_V2DF
:
29013 case V2SI_FTYPE_V2SI_V2SI
:
29014 case V2SI_FTYPE_V4HI_V4HI
:
29015 case V2SI_FTYPE_V2SF_V2SF
:
29016 case V2DF_FTYPE_V2DF_V2DF
:
29017 case V2DF_FTYPE_V2DF_V4SF
:
29018 case V2DF_FTYPE_V2DF_V2DI
:
29019 case V2DF_FTYPE_V2DF_DI
:
29020 case V2DF_FTYPE_V2DF_SI
:
29021 case V2SF_FTYPE_V2SF_V2SF
:
29022 case V1DI_FTYPE_V1DI_V1DI
:
29023 case V1DI_FTYPE_V8QI_V8QI
:
29024 case V1DI_FTYPE_V2SI_V2SI
:
29025 case V32QI_FTYPE_V16HI_V16HI
:
29026 case V16HI_FTYPE_V8SI_V8SI
:
29027 case V32QI_FTYPE_V32QI_V32QI
:
29028 case V16HI_FTYPE_V32QI_V32QI
:
29029 case V16HI_FTYPE_V16HI_V16HI
:
29030 case V8SI_FTYPE_V4DF_V4DF
:
29031 case V8SI_FTYPE_V8SI_V8SI
:
29032 case V8SI_FTYPE_V16HI_V16HI
:
29033 case V4DI_FTYPE_V4DI_V4DI
:
29034 case V4DI_FTYPE_V8SI_V8SI
:
29035 if (comparison
== UNKNOWN
)
29036 return ix86_expand_binop_builtin (icode
, exp
, target
);
29039 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
29040 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
29041 gcc_assert (comparison
!= UNKNOWN
);
29045 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
29046 case V16HI_FTYPE_V16HI_SI_COUNT
:
29047 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
29048 case V8SI_FTYPE_V8SI_SI_COUNT
:
29049 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
29050 case V4DI_FTYPE_V4DI_INT_COUNT
:
29051 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
29052 case V8HI_FTYPE_V8HI_SI_COUNT
:
29053 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
29054 case V4SI_FTYPE_V4SI_SI_COUNT
:
29055 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
29056 case V4HI_FTYPE_V4HI_SI_COUNT
:
29057 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
29058 case V2DI_FTYPE_V2DI_SI_COUNT
:
29059 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
29060 case V2SI_FTYPE_V2SI_SI_COUNT
:
29061 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
29062 case V1DI_FTYPE_V1DI_SI_COUNT
:
29064 last_arg_count
= true;
29066 case UINT64_FTYPE_UINT64_UINT64
:
29067 case UINT_FTYPE_UINT_UINT
:
29068 case UINT_FTYPE_UINT_USHORT
:
29069 case UINT_FTYPE_UINT_UCHAR
:
29070 case UINT16_FTYPE_UINT16_INT
:
29071 case UINT8_FTYPE_UINT8_INT
:
29074 case V2DI_FTYPE_V2DI_INT_CONVERT
:
29077 nargs_constant
= 1;
29079 case V4DI_FTYPE_V4DI_INT_CONVERT
:
29082 nargs_constant
= 1;
29084 case V8HI_FTYPE_V8HI_INT
:
29085 case V8HI_FTYPE_V8SF_INT
:
29086 case V8HI_FTYPE_V4SF_INT
:
29087 case V8SF_FTYPE_V8SF_INT
:
29088 case V4SI_FTYPE_V4SI_INT
:
29089 case V4SI_FTYPE_V8SI_INT
:
29090 case V4HI_FTYPE_V4HI_INT
:
29091 case V4DF_FTYPE_V4DF_INT
:
29092 case V4SF_FTYPE_V4SF_INT
:
29093 case V4SF_FTYPE_V8SF_INT
:
29094 case V2DI_FTYPE_V2DI_INT
:
29095 case V2DF_FTYPE_V2DF_INT
:
29096 case V2DF_FTYPE_V4DF_INT
:
29097 case V16HI_FTYPE_V16HI_INT
:
29098 case V8SI_FTYPE_V8SI_INT
:
29099 case V4DI_FTYPE_V4DI_INT
:
29100 case V2DI_FTYPE_V4DI_INT
:
29102 nargs_constant
= 1;
29104 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
29105 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
29106 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
29107 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
29108 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
29109 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
29112 case V32QI_FTYPE_V32QI_V32QI_INT
:
29113 case V16HI_FTYPE_V16HI_V16HI_INT
:
29114 case V16QI_FTYPE_V16QI_V16QI_INT
:
29115 case V4DI_FTYPE_V4DI_V4DI_INT
:
29116 case V8HI_FTYPE_V8HI_V8HI_INT
:
29117 case V8SI_FTYPE_V8SI_V8SI_INT
:
29118 case V8SI_FTYPE_V8SI_V4SI_INT
:
29119 case V8SF_FTYPE_V8SF_V8SF_INT
:
29120 case V8SF_FTYPE_V8SF_V4SF_INT
:
29121 case V4SI_FTYPE_V4SI_V4SI_INT
:
29122 case V4DF_FTYPE_V4DF_V4DF_INT
:
29123 case V4DF_FTYPE_V4DF_V2DF_INT
:
29124 case V4SF_FTYPE_V4SF_V4SF_INT
:
29125 case V2DI_FTYPE_V2DI_V2DI_INT
:
29126 case V4DI_FTYPE_V4DI_V2DI_INT
:
29127 case V2DF_FTYPE_V2DF_V2DF_INT
:
29129 nargs_constant
= 1;
29131 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
29134 nargs_constant
= 1;
29136 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
29139 nargs_constant
= 1;
29141 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
29144 nargs_constant
= 1;
29146 case V2DI_FTYPE_V2DI_UINT_UINT
:
29148 nargs_constant
= 2;
29150 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
29151 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
29152 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
29153 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
29155 nargs_constant
= 1;
29157 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
29159 nargs_constant
= 2;
29162 gcc_unreachable ();
29165 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29167 if (comparison
!= UNKNOWN
)
29169 gcc_assert (nargs
== 2);
29170 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
29173 if (rmode
== VOIDmode
|| rmode
== tmode
)
29177 || GET_MODE (target
) != tmode
29178 || !insn_p
->operand
[0].predicate (target
, tmode
))
29179 target
= gen_reg_rtx (tmode
);
29180 real_target
= target
;
29184 target
= gen_reg_rtx (rmode
);
29185 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
29188 for (i
= 0; i
< nargs
; i
++)
29190 tree arg
= CALL_EXPR_ARG (exp
, i
);
29191 rtx op
= expand_normal (arg
);
29192 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29193 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29195 if (last_arg_count
&& (i
+ 1) == nargs
)
29197 /* SIMD shift insns take either an 8-bit immediate or
29198 register as count. But builtin functions take int as
29199 count. If count doesn't match, we put it in register. */
29202 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
29203 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
29204 op
= copy_to_reg (op
);
29207 else if ((nargs
- i
) <= nargs_constant
)
29212 case CODE_FOR_avx2_inserti128
:
29213 case CODE_FOR_avx2_extracti128
:
29214 error ("the last argument must be an 1-bit immediate");
29217 case CODE_FOR_sse4_1_roundsd
:
29218 case CODE_FOR_sse4_1_roundss
:
29220 case CODE_FOR_sse4_1_roundpd
:
29221 case CODE_FOR_sse4_1_roundps
:
29222 case CODE_FOR_avx_roundpd256
:
29223 case CODE_FOR_avx_roundps256
:
29225 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
29226 case CODE_FOR_sse4_1_roundps_sfix
:
29227 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
29228 case CODE_FOR_avx_roundps_sfix256
:
29230 case CODE_FOR_sse4_1_blendps
:
29231 case CODE_FOR_avx_blendpd256
:
29232 case CODE_FOR_avx_vpermilv4df
:
29233 error ("the last argument must be a 4-bit immediate");
29236 case CODE_FOR_sse4_1_blendpd
:
29237 case CODE_FOR_avx_vpermilv2df
:
29238 case CODE_FOR_xop_vpermil2v2df3
:
29239 case CODE_FOR_xop_vpermil2v4sf3
:
29240 case CODE_FOR_xop_vpermil2v4df3
:
29241 case CODE_FOR_xop_vpermil2v8sf3
:
29242 error ("the last argument must be a 2-bit immediate");
29245 case CODE_FOR_avx_vextractf128v4df
:
29246 case CODE_FOR_avx_vextractf128v8sf
:
29247 case CODE_FOR_avx_vextractf128v8si
:
29248 case CODE_FOR_avx_vinsertf128v4df
:
29249 case CODE_FOR_avx_vinsertf128v8sf
:
29250 case CODE_FOR_avx_vinsertf128v8si
:
29251 error ("the last argument must be a 1-bit immediate");
29254 case CODE_FOR_avx_vmcmpv2df3
:
29255 case CODE_FOR_avx_vmcmpv4sf3
:
29256 case CODE_FOR_avx_cmpv2df3
:
29257 case CODE_FOR_avx_cmpv4sf3
:
29258 case CODE_FOR_avx_cmpv4df3
:
29259 case CODE_FOR_avx_cmpv8sf3
:
29260 error ("the last argument must be a 5-bit immediate");
29264 switch (nargs_constant
)
29267 if ((nargs
- i
) == nargs_constant
)
29269 error ("the next to last argument must be an 8-bit immediate");
29273 error ("the last argument must be an 8-bit immediate");
29276 gcc_unreachable ();
29283 if (VECTOR_MODE_P (mode
))
29284 op
= safe_vector_operand (op
, mode
);
29286 /* If we aren't optimizing, only allow one memory operand to
29288 if (memory_operand (op
, mode
))
29291 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
29293 if (optimize
|| !match
|| num_memory
> 1)
29294 op
= copy_to_mode_reg (mode
, op
);
29298 op
= copy_to_reg (op
);
29299 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
29304 args
[i
].mode
= mode
;
29310 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
29313 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
29316 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29320 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29321 args
[2].op
, args
[3].op
);
29324 gcc_unreachable ();
29334 /* Subroutine of ix86_expand_builtin to take care of special insns
29335 with variable number of operands. */
29338 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
29339 tree exp
, rtx target
)
29343 unsigned int i
, nargs
, arg_adjust
, memory
;
29347 enum machine_mode mode
;
29349 enum insn_code icode
= d
->icode
;
29350 bool last_arg_constant
= false;
29351 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29352 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29353 enum { load
, store
} klass
;
29355 switch ((enum ix86_builtin_func_type
) d
->flag
)
29357 case VOID_FTYPE_VOID
:
29358 if (icode
== CODE_FOR_avx_vzeroupper
)
29359 target
= GEN_INT (vzeroupper_intrinsic
);
29360 emit_insn (GEN_FCN (icode
) (target
));
29362 case VOID_FTYPE_UINT64
:
29363 case VOID_FTYPE_UNSIGNED
:
29369 case INT_FTYPE_VOID
:
29370 case UINT64_FTYPE_VOID
:
29371 case UNSIGNED_FTYPE_VOID
:
29376 case UINT64_FTYPE_PUNSIGNED
:
29377 case V2DI_FTYPE_PV2DI
:
29378 case V4DI_FTYPE_PV4DI
:
29379 case V32QI_FTYPE_PCCHAR
:
29380 case V16QI_FTYPE_PCCHAR
:
29381 case V8SF_FTYPE_PCV4SF
:
29382 case V8SF_FTYPE_PCFLOAT
:
29383 case V4SF_FTYPE_PCFLOAT
:
29384 case V4DF_FTYPE_PCV2DF
:
29385 case V4DF_FTYPE_PCDOUBLE
:
29386 case V2DF_FTYPE_PCDOUBLE
:
29387 case VOID_FTYPE_PVOID
:
29392 case VOID_FTYPE_PV2SF_V4SF
:
29393 case VOID_FTYPE_PV4DI_V4DI
:
29394 case VOID_FTYPE_PV2DI_V2DI
:
29395 case VOID_FTYPE_PCHAR_V32QI
:
29396 case VOID_FTYPE_PCHAR_V16QI
:
29397 case VOID_FTYPE_PFLOAT_V8SF
:
29398 case VOID_FTYPE_PFLOAT_V4SF
:
29399 case VOID_FTYPE_PDOUBLE_V4DF
:
29400 case VOID_FTYPE_PDOUBLE_V2DF
:
29401 case VOID_FTYPE_PLONGLONG_LONGLONG
:
29402 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
29403 case VOID_FTYPE_PINT_INT
:
29406 /* Reserve memory operand for target. */
29407 memory
= ARRAY_SIZE (args
);
29409 case V4SF_FTYPE_V4SF_PCV2SF
:
29410 case V2DF_FTYPE_V2DF_PCDOUBLE
:
29415 case V8SF_FTYPE_PCV8SF_V8SI
:
29416 case V4DF_FTYPE_PCV4DF_V4DI
:
29417 case V4SF_FTYPE_PCV4SF_V4SI
:
29418 case V2DF_FTYPE_PCV2DF_V2DI
:
29419 case V8SI_FTYPE_PCV8SI_V8SI
:
29420 case V4DI_FTYPE_PCV4DI_V4DI
:
29421 case V4SI_FTYPE_PCV4SI_V4SI
:
29422 case V2DI_FTYPE_PCV2DI_V2DI
:
29427 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
29428 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
29429 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
29430 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
29431 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
29432 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
29433 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
29434 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
29437 /* Reserve memory operand for target. */
29438 memory
= ARRAY_SIZE (args
);
29440 case VOID_FTYPE_UINT_UINT_UINT
:
29441 case VOID_FTYPE_UINT64_UINT_UINT
:
29442 case UCHAR_FTYPE_UINT_UINT_UINT
:
29443 case UCHAR_FTYPE_UINT64_UINT_UINT
:
29446 memory
= ARRAY_SIZE (args
);
29447 last_arg_constant
= true;
29450 gcc_unreachable ();
29453 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29455 if (klass
== store
)
29457 arg
= CALL_EXPR_ARG (exp
, 0);
29458 op
= expand_normal (arg
);
29459 gcc_assert (target
== 0);
29462 if (GET_MODE (op
) != Pmode
)
29463 op
= convert_to_mode (Pmode
, op
, 1);
29464 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
29467 target
= force_reg (tmode
, op
);
29475 || GET_MODE (target
) != tmode
29476 || !insn_p
->operand
[0].predicate (target
, tmode
))
29477 target
= gen_reg_rtx (tmode
);
29480 for (i
= 0; i
< nargs
; i
++)
29482 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29485 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
29486 op
= expand_normal (arg
);
29487 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29489 if (last_arg_constant
&& (i
+ 1) == nargs
)
29493 if (icode
== CODE_FOR_lwp_lwpvalsi3
29494 || icode
== CODE_FOR_lwp_lwpinssi3
29495 || icode
== CODE_FOR_lwp_lwpvaldi3
29496 || icode
== CODE_FOR_lwp_lwpinsdi3
)
29497 error ("the last argument must be a 32-bit immediate");
29499 error ("the last argument must be an 8-bit immediate");
29507 /* This must be the memory operand. */
29508 if (GET_MODE (op
) != Pmode
)
29509 op
= convert_to_mode (Pmode
, op
, 1);
29510 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29511 gcc_assert (GET_MODE (op
) == mode
29512 || GET_MODE (op
) == VOIDmode
);
29516 /* This must be register. */
29517 if (VECTOR_MODE_P (mode
))
29518 op
= safe_vector_operand (op
, mode
);
29520 gcc_assert (GET_MODE (op
) == mode
29521 || GET_MODE (op
) == VOIDmode
);
29522 op
= copy_to_mode_reg (mode
, op
);
29527 args
[i
].mode
= mode
;
29533 pat
= GEN_FCN (icode
) (target
);
29536 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29539 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29542 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29545 gcc_unreachable ();
29551 return klass
== store
? 0 : target
;
29554 /* Return the integer constant in ARG. Constrain it to be in the range
29555 of the subparts of VEC_TYPE; issue an error if not. */
29558 get_element_number (tree vec_type
, tree arg
)
29560 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29562 if (!host_integerp (arg
, 1)
29563 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29565 error ("selector must be an integer constant in the range 0..%wi", max
);
29572 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29573 ix86_expand_vector_init. We DO have language-level syntax for this, in
29574 the form of (type){ init-list }. Except that since we can't place emms
29575 instructions from inside the compiler, we can't allow the use of MMX
29576 registers unless the user explicitly asks for it. So we do *not* define
29577 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29578 we have builtins invoked by mmintrin.h that gives us license to emit
29579 these sorts of instructions. */
29582 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29584 enum machine_mode tmode
= TYPE_MODE (type
);
29585 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29586 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29587 rtvec v
= rtvec_alloc (n_elt
);
29589 gcc_assert (VECTOR_MODE_P (tmode
));
29590 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29592 for (i
= 0; i
< n_elt
; ++i
)
29594 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29595 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29598 if (!target
|| !register_operand (target
, tmode
))
29599 target
= gen_reg_rtx (tmode
);
29601 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29605 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29606 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29607 had a language-level syntax for referencing vector elements. */
29610 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29612 enum machine_mode tmode
, mode0
;
29617 arg0
= CALL_EXPR_ARG (exp
, 0);
29618 arg1
= CALL_EXPR_ARG (exp
, 1);
29620 op0
= expand_normal (arg0
);
29621 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
29623 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29624 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
29625 gcc_assert (VECTOR_MODE_P (mode0
));
29627 op0
= force_reg (mode0
, op0
);
29629 if (optimize
|| !target
|| !register_operand (target
, tmode
))
29630 target
= gen_reg_rtx (tmode
);
29632 ix86_expand_vector_extract (true, target
, op0
, elt
);
29637 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29638 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29639 a language-level syntax for referencing vector elements. */
29642 ix86_expand_vec_set_builtin (tree exp
)
29644 enum machine_mode tmode
, mode1
;
29645 tree arg0
, arg1
, arg2
;
29647 rtx op0
, op1
, target
;
29649 arg0
= CALL_EXPR_ARG (exp
, 0);
29650 arg1
= CALL_EXPR_ARG (exp
, 1);
29651 arg2
= CALL_EXPR_ARG (exp
, 2);
29653 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
29654 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29655 gcc_assert (VECTOR_MODE_P (tmode
));
29657 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
29658 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
29659 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
29661 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
29662 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
29664 op0
= force_reg (tmode
, op0
);
29665 op1
= force_reg (mode1
, op1
);
29667 /* OP0 is the source of these builtin functions and shouldn't be
29668 modified. Create a copy, use it and return it as target. */
29669 target
= gen_reg_rtx (tmode
);
29670 emit_move_insn (target
, op0
);
29671 ix86_expand_vector_set (true, target
, op1
, elt
);
29676 /* Expand an expression EXP that calls a built-in function,
29677 with result going to TARGET if that's convenient
29678 (and in mode MODE if that's convenient).
29679 SUBTARGET may be used as the target for computing one of EXP's operands.
29680 IGNORE is nonzero if the value is to be ignored. */
29683 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
29684 enum machine_mode mode ATTRIBUTE_UNUSED
,
29685 int ignore ATTRIBUTE_UNUSED
)
29687 const struct builtin_description
*d
;
29689 enum insn_code icode
;
29690 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
29691 tree arg0
, arg1
, arg2
, arg3
, arg4
;
29692 rtx op0
, op1
, op2
, op3
, op4
, pat
;
29693 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
29694 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
29696 /* For CPU builtins that can be folded, fold first and expand the fold. */
29699 case IX86_BUILTIN_CPU_INIT
:
29701 /* Make it call __cpu_indicator_init in libgcc. */
29702 tree call_expr
, fndecl
, type
;
29703 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
29704 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
29705 call_expr
= build_call_expr (fndecl
, 0);
29706 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
29708 case IX86_BUILTIN_CPU_IS
:
29709 case IX86_BUILTIN_CPU_SUPPORTS
:
29711 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29712 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
29713 gcc_assert (fold_expr
!= NULL_TREE
);
29714 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
29718 /* Determine whether the builtin function is available under the current ISA.
29719 Originally the builtin was not created if it wasn't applicable to the
29720 current ISA based on the command line switches. With function specific
29721 options, we need to check in the context of the function making the call
29722 whether it is supported. */
29723 if (ix86_builtins_isa
[fcode
].isa
29724 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
29726 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
29727 NULL
, (enum fpmath_unit
) 0, false);
29730 error ("%qE needs unknown isa option", fndecl
);
29733 gcc_assert (opts
!= NULL
);
29734 error ("%qE needs isa option %s", fndecl
, opts
);
29742 case IX86_BUILTIN_MASKMOVQ
:
29743 case IX86_BUILTIN_MASKMOVDQU
:
29744 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
29745 ? CODE_FOR_mmx_maskmovq
29746 : CODE_FOR_sse2_maskmovdqu
);
29747 /* Note the arg order is different from the operand order. */
29748 arg1
= CALL_EXPR_ARG (exp
, 0);
29749 arg2
= CALL_EXPR_ARG (exp
, 1);
29750 arg0
= CALL_EXPR_ARG (exp
, 2);
29751 op0
= expand_normal (arg0
);
29752 op1
= expand_normal (arg1
);
29753 op2
= expand_normal (arg2
);
29754 mode0
= insn_data
[icode
].operand
[0].mode
;
29755 mode1
= insn_data
[icode
].operand
[1].mode
;
29756 mode2
= insn_data
[icode
].operand
[2].mode
;
29758 if (GET_MODE (op0
) != Pmode
)
29759 op0
= convert_to_mode (Pmode
, op0
, 1);
29760 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
29762 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29763 op0
= copy_to_mode_reg (mode0
, op0
);
29764 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
29765 op1
= copy_to_mode_reg (mode1
, op1
);
29766 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
29767 op2
= copy_to_mode_reg (mode2
, op2
);
29768 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
29774 case IX86_BUILTIN_LDMXCSR
:
29775 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
29776 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29777 emit_move_insn (target
, op0
);
29778 emit_insn (gen_sse_ldmxcsr (target
));
29781 case IX86_BUILTIN_STMXCSR
:
29782 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29783 emit_insn (gen_sse_stmxcsr (target
));
29784 return copy_to_mode_reg (SImode
, target
);
29786 case IX86_BUILTIN_CLFLUSH
:
29787 arg0
= CALL_EXPR_ARG (exp
, 0);
29788 op0
= expand_normal (arg0
);
29789 icode
= CODE_FOR_sse2_clflush
;
29790 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29792 if (GET_MODE (op0
) != Pmode
)
29793 op0
= convert_to_mode (Pmode
, op0
, 1);
29794 op0
= force_reg (Pmode
, op0
);
29797 emit_insn (gen_sse2_clflush (op0
));
29800 case IX86_BUILTIN_MONITOR
:
29801 arg0
= CALL_EXPR_ARG (exp
, 0);
29802 arg1
= CALL_EXPR_ARG (exp
, 1);
29803 arg2
= CALL_EXPR_ARG (exp
, 2);
29804 op0
= expand_normal (arg0
);
29805 op1
= expand_normal (arg1
);
29806 op2
= expand_normal (arg2
);
29809 if (GET_MODE (op0
) != Pmode
)
29810 op0
= convert_to_mode (Pmode
, op0
, 1);
29811 op0
= force_reg (Pmode
, op0
);
29814 op1
= copy_to_mode_reg (SImode
, op1
);
29816 op2
= copy_to_mode_reg (SImode
, op2
);
29817 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
29820 case IX86_BUILTIN_MWAIT
:
29821 arg0
= CALL_EXPR_ARG (exp
, 0);
29822 arg1
= CALL_EXPR_ARG (exp
, 1);
29823 op0
= expand_normal (arg0
);
29824 op1
= expand_normal (arg1
);
29826 op0
= copy_to_mode_reg (SImode
, op0
);
29828 op1
= copy_to_mode_reg (SImode
, op1
);
29829 emit_insn (gen_sse3_mwait (op0
, op1
));
29832 case IX86_BUILTIN_VEC_INIT_V2SI
:
29833 case IX86_BUILTIN_VEC_INIT_V4HI
:
29834 case IX86_BUILTIN_VEC_INIT_V8QI
:
29835 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
29837 case IX86_BUILTIN_VEC_EXT_V2DF
:
29838 case IX86_BUILTIN_VEC_EXT_V2DI
:
29839 case IX86_BUILTIN_VEC_EXT_V4SF
:
29840 case IX86_BUILTIN_VEC_EXT_V4SI
:
29841 case IX86_BUILTIN_VEC_EXT_V8HI
:
29842 case IX86_BUILTIN_VEC_EXT_V2SI
:
29843 case IX86_BUILTIN_VEC_EXT_V4HI
:
29844 case IX86_BUILTIN_VEC_EXT_V16QI
:
29845 return ix86_expand_vec_ext_builtin (exp
, target
);
29847 case IX86_BUILTIN_VEC_SET_V2DI
:
29848 case IX86_BUILTIN_VEC_SET_V4SF
:
29849 case IX86_BUILTIN_VEC_SET_V4SI
:
29850 case IX86_BUILTIN_VEC_SET_V8HI
:
29851 case IX86_BUILTIN_VEC_SET_V4HI
:
29852 case IX86_BUILTIN_VEC_SET_V16QI
:
29853 return ix86_expand_vec_set_builtin (exp
);
29855 case IX86_BUILTIN_INFQ
:
29856 case IX86_BUILTIN_HUGE_VALQ
:
29858 REAL_VALUE_TYPE inf
;
29862 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
29864 tmp
= validize_mem (force_const_mem (mode
, tmp
));
29867 target
= gen_reg_rtx (mode
);
29869 emit_move_insn (target
, tmp
);
29873 case IX86_BUILTIN_LLWPCB
:
29874 arg0
= CALL_EXPR_ARG (exp
, 0);
29875 op0
= expand_normal (arg0
);
29876 icode
= CODE_FOR_lwp_llwpcb
;
29877 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29879 if (GET_MODE (op0
) != Pmode
)
29880 op0
= convert_to_mode (Pmode
, op0
, 1);
29881 op0
= force_reg (Pmode
, op0
);
29883 emit_insn (gen_lwp_llwpcb (op0
));
29886 case IX86_BUILTIN_SLWPCB
:
29887 icode
= CODE_FOR_lwp_slwpcb
;
29889 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
29890 target
= gen_reg_rtx (Pmode
);
29891 emit_insn (gen_lwp_slwpcb (target
));
29894 case IX86_BUILTIN_BEXTRI32
:
29895 case IX86_BUILTIN_BEXTRI64
:
29896 arg0
= CALL_EXPR_ARG (exp
, 0);
29897 arg1
= CALL_EXPR_ARG (exp
, 1);
29898 op0
= expand_normal (arg0
);
29899 op1
= expand_normal (arg1
);
29900 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
29901 ? CODE_FOR_tbm_bextri_si
29902 : CODE_FOR_tbm_bextri_di
);
29903 if (!CONST_INT_P (op1
))
29905 error ("last argument must be an immediate");
29910 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
29911 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
29912 op1
= GEN_INT (length
);
29913 op2
= GEN_INT (lsb_index
);
29914 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
29920 case IX86_BUILTIN_RDRAND16_STEP
:
29921 icode
= CODE_FOR_rdrandhi_1
;
29925 case IX86_BUILTIN_RDRAND32_STEP
:
29926 icode
= CODE_FOR_rdrandsi_1
;
29930 case IX86_BUILTIN_RDRAND64_STEP
:
29931 icode
= CODE_FOR_rdranddi_1
;
29935 op0
= gen_reg_rtx (mode0
);
29936 emit_insn (GEN_FCN (icode
) (op0
));
29938 arg0
= CALL_EXPR_ARG (exp
, 0);
29939 op1
= expand_normal (arg0
);
29940 if (!address_operand (op1
, VOIDmode
))
29942 op1
= convert_memory_address (Pmode
, op1
);
29943 op1
= copy_addr_to_reg (op1
);
29945 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
29947 op1
= gen_reg_rtx (SImode
);
29948 emit_move_insn (op1
, CONST1_RTX (SImode
));
29950 /* Emit SImode conditional move. */
29951 if (mode0
== HImode
)
29953 op2
= gen_reg_rtx (SImode
);
29954 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
29956 else if (mode0
== SImode
)
29959 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
29962 target
= gen_reg_rtx (SImode
);
29964 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
29966 emit_insn (gen_rtx_SET (VOIDmode
, target
,
29967 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
29970 case IX86_BUILTIN_GATHERSIV2DF
:
29971 icode
= CODE_FOR_avx2_gathersiv2df
;
29973 case IX86_BUILTIN_GATHERSIV4DF
:
29974 icode
= CODE_FOR_avx2_gathersiv4df
;
29976 case IX86_BUILTIN_GATHERDIV2DF
:
29977 icode
= CODE_FOR_avx2_gatherdiv2df
;
29979 case IX86_BUILTIN_GATHERDIV4DF
:
29980 icode
= CODE_FOR_avx2_gatherdiv4df
;
29982 case IX86_BUILTIN_GATHERSIV4SF
:
29983 icode
= CODE_FOR_avx2_gathersiv4sf
;
29985 case IX86_BUILTIN_GATHERSIV8SF
:
29986 icode
= CODE_FOR_avx2_gathersiv8sf
;
29988 case IX86_BUILTIN_GATHERDIV4SF
:
29989 icode
= CODE_FOR_avx2_gatherdiv4sf
;
29991 case IX86_BUILTIN_GATHERDIV8SF
:
29992 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29994 case IX86_BUILTIN_GATHERSIV2DI
:
29995 icode
= CODE_FOR_avx2_gathersiv2di
;
29997 case IX86_BUILTIN_GATHERSIV4DI
:
29998 icode
= CODE_FOR_avx2_gathersiv4di
;
30000 case IX86_BUILTIN_GATHERDIV2DI
:
30001 icode
= CODE_FOR_avx2_gatherdiv2di
;
30003 case IX86_BUILTIN_GATHERDIV4DI
:
30004 icode
= CODE_FOR_avx2_gatherdiv4di
;
30006 case IX86_BUILTIN_GATHERSIV4SI
:
30007 icode
= CODE_FOR_avx2_gathersiv4si
;
30009 case IX86_BUILTIN_GATHERSIV8SI
:
30010 icode
= CODE_FOR_avx2_gathersiv8si
;
30012 case IX86_BUILTIN_GATHERDIV4SI
:
30013 icode
= CODE_FOR_avx2_gatherdiv4si
;
30015 case IX86_BUILTIN_GATHERDIV8SI
:
30016 icode
= CODE_FOR_avx2_gatherdiv8si
;
30018 case IX86_BUILTIN_GATHERALTSIV4DF
:
30019 icode
= CODE_FOR_avx2_gathersiv4df
;
30021 case IX86_BUILTIN_GATHERALTDIV8SF
:
30022 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30024 case IX86_BUILTIN_GATHERALTSIV4DI
:
30025 icode
= CODE_FOR_avx2_gathersiv4di
;
30027 case IX86_BUILTIN_GATHERALTDIV8SI
:
30028 icode
= CODE_FOR_avx2_gatherdiv8si
;
30032 arg0
= CALL_EXPR_ARG (exp
, 0);
30033 arg1
= CALL_EXPR_ARG (exp
, 1);
30034 arg2
= CALL_EXPR_ARG (exp
, 2);
30035 arg3
= CALL_EXPR_ARG (exp
, 3);
30036 arg4
= CALL_EXPR_ARG (exp
, 4);
30037 op0
= expand_normal (arg0
);
30038 op1
= expand_normal (arg1
);
30039 op2
= expand_normal (arg2
);
30040 op3
= expand_normal (arg3
);
30041 op4
= expand_normal (arg4
);
30042 /* Note the arg order is different from the operand order. */
30043 mode0
= insn_data
[icode
].operand
[1].mode
;
30044 mode2
= insn_data
[icode
].operand
[3].mode
;
30045 mode3
= insn_data
[icode
].operand
[4].mode
;
30046 mode4
= insn_data
[icode
].operand
[5].mode
;
30048 if (target
== NULL_RTX
30049 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
30050 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
30052 subtarget
= target
;
30054 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
30055 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
30057 rtx half
= gen_reg_rtx (V4SImode
);
30058 if (!nonimmediate_operand (op2
, V8SImode
))
30059 op2
= copy_to_mode_reg (V8SImode
, op2
);
30060 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
30063 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
30064 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
30066 rtx (*gen
) (rtx
, rtx
);
30067 rtx half
= gen_reg_rtx (mode0
);
30068 if (mode0
== V4SFmode
)
30069 gen
= gen_vec_extract_lo_v8sf
;
30071 gen
= gen_vec_extract_lo_v8si
;
30072 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
30073 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
30074 emit_insn (gen (half
, op0
));
30076 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
30077 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
30078 emit_insn (gen (half
, op3
));
30082 /* Force memory operand only with base register here. But we
30083 don't want to do it on memory operand for other builtin
30085 if (GET_MODE (op1
) != Pmode
)
30086 op1
= convert_to_mode (Pmode
, op1
, 1);
30087 op1
= force_reg (Pmode
, op1
);
30089 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30090 op0
= copy_to_mode_reg (mode0
, op0
);
30091 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
30092 op1
= copy_to_mode_reg (Pmode
, op1
);
30093 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
30094 op2
= copy_to_mode_reg (mode2
, op2
);
30095 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
30096 op3
= copy_to_mode_reg (mode3
, op3
);
30097 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
30099 error ("last argument must be scale 1, 2, 4, 8");
30103 /* Optimize. If mask is known to have all high bits set,
30104 replace op0 with pc_rtx to signal that the instruction
30105 overwrites the whole destination and doesn't use its
30106 previous contents. */
30109 if (TREE_CODE (arg3
) == VECTOR_CST
)
30111 unsigned int negative
= 0;
30112 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
30114 tree cst
= VECTOR_CST_ELT (arg3
, i
);
30115 if (TREE_CODE (cst
) == INTEGER_CST
30116 && tree_int_cst_sign_bit (cst
))
30118 else if (TREE_CODE (cst
) == REAL_CST
30119 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
30122 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
30125 else if (TREE_CODE (arg3
) == SSA_NAME
)
30127 /* Recognize also when mask is like:
30128 __v2df src = _mm_setzero_pd ();
30129 __v2df mask = _mm_cmpeq_pd (src, src);
30131 __v8sf src = _mm256_setzero_ps ();
30132 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
30133 as that is a cheaper way to load all ones into
30134 a register than having to load a constant from
30136 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
30137 if (is_gimple_call (def_stmt
))
30139 tree fndecl
= gimple_call_fndecl (def_stmt
);
30141 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30142 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
30144 case IX86_BUILTIN_CMPPD
:
30145 case IX86_BUILTIN_CMPPS
:
30146 case IX86_BUILTIN_CMPPD256
:
30147 case IX86_BUILTIN_CMPPS256
:
30148 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
30151 case IX86_BUILTIN_CMPEQPD
:
30152 case IX86_BUILTIN_CMPEQPS
:
30153 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
30154 && initializer_zerop (gimple_call_arg (def_stmt
,
30165 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
30170 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
30171 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
30173 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
30174 ? V4SFmode
: V4SImode
;
30175 if (target
== NULL_RTX
)
30176 target
= gen_reg_rtx (tmode
);
30177 if (tmode
== V4SFmode
)
30178 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
30180 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
30183 target
= subtarget
;
30187 case IX86_BUILTIN_XABORT
:
30188 icode
= CODE_FOR_xabort
;
30189 arg0
= CALL_EXPR_ARG (exp
, 0);
30190 op0
= expand_normal (arg0
);
30191 mode0
= insn_data
[icode
].operand
[0].mode
;
30192 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
30194 error ("the xabort's argument must be an 8-bit immediate");
30197 emit_insn (gen_xabort (op0
));
30204 for (i
= 0, d
= bdesc_special_args
;
30205 i
< ARRAY_SIZE (bdesc_special_args
);
30207 if (d
->code
== fcode
)
30208 return ix86_expand_special_args_builtin (d
, exp
, target
);
30210 for (i
= 0, d
= bdesc_args
;
30211 i
< ARRAY_SIZE (bdesc_args
);
30213 if (d
->code
== fcode
)
30216 case IX86_BUILTIN_FABSQ
:
30217 case IX86_BUILTIN_COPYSIGNQ
:
30219 /* Emit a normal call if SSE isn't available. */
30220 return expand_call (exp
, target
, ignore
);
30222 return ix86_expand_args_builtin (d
, exp
, target
);
30225 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
30226 if (d
->code
== fcode
)
30227 return ix86_expand_sse_comi (d
, exp
, target
);
30229 for (i
= 0, d
= bdesc_pcmpestr
;
30230 i
< ARRAY_SIZE (bdesc_pcmpestr
);
30232 if (d
->code
== fcode
)
30233 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
30235 for (i
= 0, d
= bdesc_pcmpistr
;
30236 i
< ARRAY_SIZE (bdesc_pcmpistr
);
30238 if (d
->code
== fcode
)
30239 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
30241 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
30242 if (d
->code
== fcode
)
30243 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
30244 (enum ix86_builtin_func_type
)
30245 d
->flag
, d
->comparison
);
30247 gcc_unreachable ();
30250 /* Returns a function decl for a vectorized version of the builtin function
30251 with builtin function code FN and the result vector type TYPE, or NULL_TREE
30252 if it is not available. */
30255 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
30258 enum machine_mode in_mode
, out_mode
;
30260 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
30262 if (TREE_CODE (type_out
) != VECTOR_TYPE
30263 || TREE_CODE (type_in
) != VECTOR_TYPE
30264 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
30267 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30268 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
30269 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30270 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30274 case BUILT_IN_SQRT
:
30275 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30277 if (out_n
== 2 && in_n
== 2)
30278 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
30279 else if (out_n
== 4 && in_n
== 4)
30280 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
30284 case BUILT_IN_SQRTF
:
30285 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30287 if (out_n
== 4 && in_n
== 4)
30288 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
30289 else if (out_n
== 8 && in_n
== 8)
30290 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
30294 case BUILT_IN_IFLOOR
:
30295 case BUILT_IN_LFLOOR
:
30296 case BUILT_IN_LLFLOOR
:
30297 /* The round insn does not trap on denormals. */
30298 if (flag_trapping_math
|| !TARGET_ROUND
)
30301 if (out_mode
== SImode
&& in_mode
== DFmode
)
30303 if (out_n
== 4 && in_n
== 2)
30304 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
30305 else if (out_n
== 8 && in_n
== 4)
30306 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
30310 case BUILT_IN_IFLOORF
:
30311 case BUILT_IN_LFLOORF
:
30312 case BUILT_IN_LLFLOORF
:
30313 /* The round insn does not trap on denormals. */
30314 if (flag_trapping_math
|| !TARGET_ROUND
)
30317 if (out_mode
== SImode
&& in_mode
== SFmode
)
30319 if (out_n
== 4 && in_n
== 4)
30320 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
30321 else if (out_n
== 8 && in_n
== 8)
30322 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
30326 case BUILT_IN_ICEIL
:
30327 case BUILT_IN_LCEIL
:
30328 case BUILT_IN_LLCEIL
:
30329 /* The round insn does not trap on denormals. */
30330 if (flag_trapping_math
|| !TARGET_ROUND
)
30333 if (out_mode
== SImode
&& in_mode
== DFmode
)
30335 if (out_n
== 4 && in_n
== 2)
30336 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
30337 else if (out_n
== 8 && in_n
== 4)
30338 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
30342 case BUILT_IN_ICEILF
:
30343 case BUILT_IN_LCEILF
:
30344 case BUILT_IN_LLCEILF
:
30345 /* The round insn does not trap on denormals. */
30346 if (flag_trapping_math
|| !TARGET_ROUND
)
30349 if (out_mode
== SImode
&& in_mode
== SFmode
)
30351 if (out_n
== 4 && in_n
== 4)
30352 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
30353 else if (out_n
== 8 && in_n
== 8)
30354 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
30358 case BUILT_IN_IRINT
:
30359 case BUILT_IN_LRINT
:
30360 case BUILT_IN_LLRINT
:
30361 if (out_mode
== SImode
&& in_mode
== DFmode
)
30363 if (out_n
== 4 && in_n
== 2)
30364 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
30365 else if (out_n
== 8 && in_n
== 4)
30366 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
30370 case BUILT_IN_IRINTF
:
30371 case BUILT_IN_LRINTF
:
30372 case BUILT_IN_LLRINTF
:
30373 if (out_mode
== SImode
&& in_mode
== SFmode
)
30375 if (out_n
== 4 && in_n
== 4)
30376 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
30377 else if (out_n
== 8 && in_n
== 8)
30378 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
30382 case BUILT_IN_IROUND
:
30383 case BUILT_IN_LROUND
:
30384 case BUILT_IN_LLROUND
:
30385 /* The round insn does not trap on denormals. */
30386 if (flag_trapping_math
|| !TARGET_ROUND
)
30389 if (out_mode
== SImode
&& in_mode
== DFmode
)
30391 if (out_n
== 4 && in_n
== 2)
30392 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
30393 else if (out_n
== 8 && in_n
== 4)
30394 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
30398 case BUILT_IN_IROUNDF
:
30399 case BUILT_IN_LROUNDF
:
30400 case BUILT_IN_LLROUNDF
:
30401 /* The round insn does not trap on denormals. */
30402 if (flag_trapping_math
|| !TARGET_ROUND
)
30405 if (out_mode
== SImode
&& in_mode
== SFmode
)
30407 if (out_n
== 4 && in_n
== 4)
30408 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
30409 else if (out_n
== 8 && in_n
== 8)
30410 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
30414 case BUILT_IN_COPYSIGN
:
30415 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30417 if (out_n
== 2 && in_n
== 2)
30418 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
30419 else if (out_n
== 4 && in_n
== 4)
30420 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
30424 case BUILT_IN_COPYSIGNF
:
30425 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30427 if (out_n
== 4 && in_n
== 4)
30428 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
30429 else if (out_n
== 8 && in_n
== 8)
30430 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
30434 case BUILT_IN_FLOOR
:
30435 /* The round insn does not trap on denormals. */
30436 if (flag_trapping_math
|| !TARGET_ROUND
)
30439 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30441 if (out_n
== 2 && in_n
== 2)
30442 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
30443 else if (out_n
== 4 && in_n
== 4)
30444 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
30448 case BUILT_IN_FLOORF
:
30449 /* The round insn does not trap on denormals. */
30450 if (flag_trapping_math
|| !TARGET_ROUND
)
30453 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30455 if (out_n
== 4 && in_n
== 4)
30456 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
30457 else if (out_n
== 8 && in_n
== 8)
30458 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
30462 case BUILT_IN_CEIL
:
30463 /* The round insn does not trap on denormals. */
30464 if (flag_trapping_math
|| !TARGET_ROUND
)
30467 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30469 if (out_n
== 2 && in_n
== 2)
30470 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
30471 else if (out_n
== 4 && in_n
== 4)
30472 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
30476 case BUILT_IN_CEILF
:
30477 /* The round insn does not trap on denormals. */
30478 if (flag_trapping_math
|| !TARGET_ROUND
)
30481 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30483 if (out_n
== 4 && in_n
== 4)
30484 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
30485 else if (out_n
== 8 && in_n
== 8)
30486 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
30490 case BUILT_IN_TRUNC
:
30491 /* The round insn does not trap on denormals. */
30492 if (flag_trapping_math
|| !TARGET_ROUND
)
30495 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30497 if (out_n
== 2 && in_n
== 2)
30498 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
30499 else if (out_n
== 4 && in_n
== 4)
30500 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
30504 case BUILT_IN_TRUNCF
:
30505 /* The round insn does not trap on denormals. */
30506 if (flag_trapping_math
|| !TARGET_ROUND
)
30509 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30511 if (out_n
== 4 && in_n
== 4)
30512 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
30513 else if (out_n
== 8 && in_n
== 8)
30514 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
30518 case BUILT_IN_RINT
:
30519 /* The round insn does not trap on denormals. */
30520 if (flag_trapping_math
|| !TARGET_ROUND
)
30523 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30525 if (out_n
== 2 && in_n
== 2)
30526 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
30527 else if (out_n
== 4 && in_n
== 4)
30528 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
30532 case BUILT_IN_RINTF
:
30533 /* The round insn does not trap on denormals. */
30534 if (flag_trapping_math
|| !TARGET_ROUND
)
30537 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30539 if (out_n
== 4 && in_n
== 4)
30540 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
30541 else if (out_n
== 8 && in_n
== 8)
30542 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
30546 case BUILT_IN_ROUND
:
30547 /* The round insn does not trap on denormals. */
30548 if (flag_trapping_math
|| !TARGET_ROUND
)
30551 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30553 if (out_n
== 2 && in_n
== 2)
30554 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
30555 else if (out_n
== 4 && in_n
== 4)
30556 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
30560 case BUILT_IN_ROUNDF
:
30561 /* The round insn does not trap on denormals. */
30562 if (flag_trapping_math
|| !TARGET_ROUND
)
30565 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30567 if (out_n
== 4 && in_n
== 4)
30568 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
30569 else if (out_n
== 8 && in_n
== 8)
30570 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
30575 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30577 if (out_n
== 2 && in_n
== 2)
30578 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
30579 if (out_n
== 4 && in_n
== 4)
30580 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
30584 case BUILT_IN_FMAF
:
30585 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30587 if (out_n
== 4 && in_n
== 4)
30588 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
30589 if (out_n
== 8 && in_n
== 8)
30590 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
30598 /* Dispatch to a handler for a vectorization library. */
30599 if (ix86_veclib_handler
)
30600 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
30606 /* Handler for an SVML-style interface to
30607 a library with vectorized intrinsics. */
30610 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
30613 tree fntype
, new_fndecl
, args
;
30616 enum machine_mode el_mode
, in_mode
;
30619 /* The SVML is suitable for unsafe math only. */
30620 if (!flag_unsafe_math_optimizations
)
30623 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30624 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30625 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30626 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30627 if (el_mode
!= in_mode
30635 case BUILT_IN_LOG10
:
30637 case BUILT_IN_TANH
:
30639 case BUILT_IN_ATAN
:
30640 case BUILT_IN_ATAN2
:
30641 case BUILT_IN_ATANH
:
30642 case BUILT_IN_CBRT
:
30643 case BUILT_IN_SINH
:
30645 case BUILT_IN_ASINH
:
30646 case BUILT_IN_ASIN
:
30647 case BUILT_IN_COSH
:
30649 case BUILT_IN_ACOSH
:
30650 case BUILT_IN_ACOS
:
30651 if (el_mode
!= DFmode
|| n
!= 2)
30655 case BUILT_IN_EXPF
:
30656 case BUILT_IN_LOGF
:
30657 case BUILT_IN_LOG10F
:
30658 case BUILT_IN_POWF
:
30659 case BUILT_IN_TANHF
:
30660 case BUILT_IN_TANF
:
30661 case BUILT_IN_ATANF
:
30662 case BUILT_IN_ATAN2F
:
30663 case BUILT_IN_ATANHF
:
30664 case BUILT_IN_CBRTF
:
30665 case BUILT_IN_SINHF
:
30666 case BUILT_IN_SINF
:
30667 case BUILT_IN_ASINHF
:
30668 case BUILT_IN_ASINF
:
30669 case BUILT_IN_COSHF
:
30670 case BUILT_IN_COSF
:
30671 case BUILT_IN_ACOSHF
:
30672 case BUILT_IN_ACOSF
:
30673 if (el_mode
!= SFmode
|| n
!= 4)
30681 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30683 if (fn
== BUILT_IN_LOGF
)
30684 strcpy (name
, "vmlsLn4");
30685 else if (fn
== BUILT_IN_LOG
)
30686 strcpy (name
, "vmldLn2");
30689 sprintf (name
, "vmls%s", bname
+10);
30690 name
[strlen (name
)-1] = '4';
30693 sprintf (name
, "vmld%s2", bname
+10);
30695 /* Convert to uppercase. */
30699 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30701 args
= TREE_CHAIN (args
))
30705 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30707 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30709 /* Build a function declaration for the vectorized function. */
30710 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30711 FUNCTION_DECL
, get_identifier (name
), fntype
);
30712 TREE_PUBLIC (new_fndecl
) = 1;
30713 DECL_EXTERNAL (new_fndecl
) = 1;
30714 DECL_IS_NOVOPS (new_fndecl
) = 1;
30715 TREE_READONLY (new_fndecl
) = 1;
30720 /* Handler for an ACML-style interface to
30721 a library with vectorized intrinsics. */
30724 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
30726 char name
[20] = "__vr.._";
30727 tree fntype
, new_fndecl
, args
;
30730 enum machine_mode el_mode
, in_mode
;
30733 /* The ACML is 64bits only and suitable for unsafe math only as
30734 it does not correctly support parts of IEEE with the required
30735 precision such as denormals. */
30737 || !flag_unsafe_math_optimizations
)
30740 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30741 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30742 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30743 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30744 if (el_mode
!= in_mode
30754 case BUILT_IN_LOG2
:
30755 case BUILT_IN_LOG10
:
30758 if (el_mode
!= DFmode
30763 case BUILT_IN_SINF
:
30764 case BUILT_IN_COSF
:
30765 case BUILT_IN_EXPF
:
30766 case BUILT_IN_POWF
:
30767 case BUILT_IN_LOGF
:
30768 case BUILT_IN_LOG2F
:
30769 case BUILT_IN_LOG10F
:
30772 if (el_mode
!= SFmode
30781 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30782 sprintf (name
+ 7, "%s", bname
+10);
30785 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30787 args
= TREE_CHAIN (args
))
30791 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30793 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30795 /* Build a function declaration for the vectorized function. */
30796 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30797 FUNCTION_DECL
, get_identifier (name
), fntype
);
30798 TREE_PUBLIC (new_fndecl
) = 1;
30799 DECL_EXTERNAL (new_fndecl
) = 1;
30800 DECL_IS_NOVOPS (new_fndecl
) = 1;
30801 TREE_READONLY (new_fndecl
) = 1;
30806 /* Returns a decl of a function that implements gather load with
30807 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30808 Return NULL_TREE if it is not available. */
30811 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
30812 const_tree index_type
, int scale
)
30815 enum ix86_builtins code
;
30820 if ((TREE_CODE (index_type
) != INTEGER_TYPE
30821 && !POINTER_TYPE_P (index_type
))
30822 || (TYPE_MODE (index_type
) != SImode
30823 && TYPE_MODE (index_type
) != DImode
))
30826 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
30829 /* v*gather* insn sign extends index to pointer mode. */
30830 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
30831 && TYPE_UNSIGNED (index_type
))
30836 || (scale
& (scale
- 1)) != 0)
30839 si
= TYPE_MODE (index_type
) == SImode
;
30840 switch (TYPE_MODE (mem_vectype
))
30843 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
30846 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
30849 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
30852 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
30855 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
30858 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
30861 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
30864 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
30870 return ix86_builtins
[code
];
30873 /* Returns a code for a target-specific builtin that implements
30874 reciprocal of the function, or NULL_TREE if not available. */
30877 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
30878 bool sqrt ATTRIBUTE_UNUSED
)
30880 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
30881 && flag_finite_math_only
&& !flag_trapping_math
30882 && flag_unsafe_math_optimizations
))
30886 /* Machine dependent builtins. */
30889 /* Vectorized version of sqrt to rsqrt conversion. */
30890 case IX86_BUILTIN_SQRTPS_NR
:
30891 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
30893 case IX86_BUILTIN_SQRTPS_NR256
:
30894 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
30900 /* Normal builtins. */
30903 /* Sqrt to rsqrt conversion. */
30904 case BUILT_IN_SQRTF
:
30905 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
30912 /* Helper for avx_vpermilps256_operand et al. This is also used by
30913 the expansion functions to turn the parallel back into a mask.
30914 The return value is 0 for no match and the imm8+1 for a match. */
30917 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
30919 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
30921 unsigned char ipar
[8];
30923 if (XVECLEN (par
, 0) != (int) nelt
)
30926 /* Validate that all of the elements are constants, and not totally
30927 out of range. Copy the data into an integral array to make the
30928 subsequent checks easier. */
30929 for (i
= 0; i
< nelt
; ++i
)
30931 rtx er
= XVECEXP (par
, 0, i
);
30932 unsigned HOST_WIDE_INT ei
;
30934 if (!CONST_INT_P (er
))
30945 /* In the 256-bit DFmode case, we can only move elements within
30947 for (i
= 0; i
< 2; ++i
)
30951 mask
|= ipar
[i
] << i
;
30953 for (i
= 2; i
< 4; ++i
)
30957 mask
|= (ipar
[i
] - 2) << i
;
30962 /* In the 256-bit SFmode case, we have full freedom of movement
30963 within the low 128-bit lane, but the high 128-bit lane must
30964 mirror the exact same pattern. */
30965 for (i
= 0; i
< 4; ++i
)
30966 if (ipar
[i
] + 4 != ipar
[i
+ 4])
30973 /* In the 128-bit case, we've full freedom in the placement of
30974 the elements from the source operand. */
30975 for (i
= 0; i
< nelt
; ++i
)
30976 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
30980 gcc_unreachable ();
30983 /* Make sure success has a non-zero value by adding one. */
30987 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30988 the expansion functions to turn the parallel back into a mask.
30989 The return value is 0 for no match and the imm8+1 for a match. */
30992 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
30994 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
30996 unsigned char ipar
[8];
30998 if (XVECLEN (par
, 0) != (int) nelt
)
31001 /* Validate that all of the elements are constants, and not totally
31002 out of range. Copy the data into an integral array to make the
31003 subsequent checks easier. */
31004 for (i
= 0; i
< nelt
; ++i
)
31006 rtx er
= XVECEXP (par
, 0, i
);
31007 unsigned HOST_WIDE_INT ei
;
31009 if (!CONST_INT_P (er
))
31012 if (ei
>= 2 * nelt
)
31017 /* Validate that the halves of the permute are halves. */
31018 for (i
= 0; i
< nelt2
- 1; ++i
)
31019 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31021 for (i
= nelt2
; i
< nelt
- 1; ++i
)
31022 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31025 /* Reconstruct the mask. */
31026 for (i
= 0; i
< 2; ++i
)
31028 unsigned e
= ipar
[i
* nelt2
];
31032 mask
|= e
<< (i
* 4);
31035 /* Make sure success has a non-zero value by adding one. */
31039 /* Store OPERAND to the memory after reload is completed. This means
31040 that we can't easily use assign_stack_local. */
31042 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
31046 gcc_assert (reload_completed
);
31047 if (ix86_using_red_zone ())
31049 result
= gen_rtx_MEM (mode
,
31050 gen_rtx_PLUS (Pmode
,
31052 GEN_INT (-RED_ZONE_SIZE
)));
31053 emit_move_insn (result
, operand
);
31055 else if (TARGET_64BIT
)
31061 operand
= gen_lowpart (DImode
, operand
);
31065 gen_rtx_SET (VOIDmode
,
31066 gen_rtx_MEM (DImode
,
31067 gen_rtx_PRE_DEC (DImode
,
31068 stack_pointer_rtx
)),
31072 gcc_unreachable ();
31074 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31083 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
31085 gen_rtx_SET (VOIDmode
,
31086 gen_rtx_MEM (SImode
,
31087 gen_rtx_PRE_DEC (Pmode
,
31088 stack_pointer_rtx
)),
31091 gen_rtx_SET (VOIDmode
,
31092 gen_rtx_MEM (SImode
,
31093 gen_rtx_PRE_DEC (Pmode
,
31094 stack_pointer_rtx
)),
31099 /* Store HImodes as SImodes. */
31100 operand
= gen_lowpart (SImode
, operand
);
31104 gen_rtx_SET (VOIDmode
,
31105 gen_rtx_MEM (GET_MODE (operand
),
31106 gen_rtx_PRE_DEC (SImode
,
31107 stack_pointer_rtx
)),
31111 gcc_unreachable ();
31113 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31118 /* Free operand from the memory. */
31120 ix86_free_from_memory (enum machine_mode mode
)
31122 if (!ix86_using_red_zone ())
31126 if (mode
== DImode
|| TARGET_64BIT
)
31130 /* Use LEA to deallocate stack space. In peephole2 it will be converted
31131 to pop or add instruction if registers are available. */
31132 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
31133 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
31138 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
31140 Put float CONST_DOUBLE in the constant pool instead of fp regs.
31141 QImode must go into class Q_REGS.
31142 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
31143 movdf to do mem-to-mem moves through integer regs. */
31146 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
31148 enum machine_mode mode
= GET_MODE (x
);
31150 /* We're only allowed to return a subclass of CLASS. Many of the
31151 following checks fail for NO_REGS, so eliminate that early. */
31152 if (regclass
== NO_REGS
)
31155 /* All classes can load zeros. */
31156 if (x
== CONST0_RTX (mode
))
31159 /* Force constants into memory if we are loading a (nonzero) constant into
31160 an MMX or SSE register. This is because there are no MMX/SSE instructions
31161 to load from a constant. */
31163 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
31166 /* Prefer SSE regs only, if we can use them for math. */
31167 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
31168 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31170 /* Floating-point constants need more complex checks. */
31171 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
31173 /* General regs can load everything. */
31174 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
31177 /* Floats can load 0 and 1 plus some others. Note that we eliminated
31178 zero above. We only want to wind up preferring 80387 registers if
31179 we plan on doing computation with them. */
31181 && standard_80387_constant_p (x
) > 0)
31183 /* Limit class to non-sse. */
31184 if (regclass
== FLOAT_SSE_REGS
)
31186 if (regclass
== FP_TOP_SSE_REGS
)
31188 if (regclass
== FP_SECOND_SSE_REGS
)
31189 return FP_SECOND_REG
;
31190 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
31197 /* Generally when we see PLUS here, it's the function invariant
31198 (plus soft-fp const_int). Which can only be computed into general
31200 if (GET_CODE (x
) == PLUS
)
31201 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
31203 /* QImode constants are easy to load, but non-constant QImode data
31204 must go into Q_REGS. */
31205 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
31207 if (reg_class_subset_p (regclass
, Q_REGS
))
31209 if (reg_class_subset_p (Q_REGS
, regclass
))
31217 /* Discourage putting floating-point values in SSE registers unless
31218 SSE math is being used, and likewise for the 387 registers. */
31220 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
31222 enum machine_mode mode
= GET_MODE (x
);
31224 /* Restrict the output reload class to the register bank that we are doing
31225 math on. If we would like not to return a subset of CLASS, reject this
31226 alternative: if reload cannot do this, it will still use its choice. */
31227 mode
= GET_MODE (x
);
31228 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
31229 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
31231 if (X87_FLOAT_MODE_P (mode
))
31233 if (regclass
== FP_TOP_SSE_REGS
)
31235 else if (regclass
== FP_SECOND_SSE_REGS
)
31236 return FP_SECOND_REG
;
31238 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31245 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
31246 enum machine_mode mode
, secondary_reload_info
*sri
)
31248 /* Double-word spills from general registers to non-offsettable memory
31249 references (zero-extended addresses) require special handling. */
31252 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
31253 && rclass
== GENERAL_REGS
31254 && !offsettable_memref_p (x
))
31257 ? CODE_FOR_reload_noff_load
31258 : CODE_FOR_reload_noff_store
);
31259 /* Add the cost of moving address to a temporary. */
31260 sri
->extra_cost
= 1;
31265 /* QImode spills from non-QI registers require
31266 intermediate register on 32bit targets. */
31268 && !in_p
&& mode
== QImode
31269 && (rclass
== GENERAL_REGS
31270 || rclass
== LEGACY_REGS
31271 || rclass
== INDEX_REGS
))
31280 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
31281 regno
= true_regnum (x
);
31283 /* Return Q_REGS if the operand is in memory. */
31288 /* This condition handles corner case where an expression involving
31289 pointers gets vectorized. We're trying to use the address of a
31290 stack slot as a vector initializer.
31292 (set (reg:V2DI 74 [ vect_cst_.2 ])
31293 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
31295 Eventually frame gets turned into sp+offset like this:
31297 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31298 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31299 (const_int 392 [0x188]))))
31301 That later gets turned into:
31303 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31304 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31305 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
31307 We'll have the following reload recorded:
31309 Reload 0: reload_in (DI) =
31310 (plus:DI (reg/f:DI 7 sp)
31311 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
31312 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31313 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
31314 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
31315 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31316 reload_reg_rtx: (reg:V2DI 22 xmm1)
31318 Which isn't going to work since SSE instructions can't handle scalar
31319 additions. Returning GENERAL_REGS forces the addition into integer
31320 register and reload can handle subsequent reloads without problems. */
31322 if (in_p
&& GET_CODE (x
) == PLUS
31323 && SSE_CLASS_P (rclass
)
31324 && SCALAR_INT_MODE_P (mode
))
31325 return GENERAL_REGS
;
31330 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
31333 ix86_class_likely_spilled_p (reg_class_t rclass
)
31344 case SSE_FIRST_REG
:
31346 case FP_SECOND_REG
:
31356 /* If we are copying between general and FP registers, we need a memory
31357 location. The same is true for SSE and MMX registers.
31359 To optimize register_move_cost performance, allow inline variant.
31361 The macro can't work reliably when one of the CLASSES is class containing
31362 registers from multiple units (SSE, MMX, integer). We avoid this by never
31363 combining those units in single alternative in the machine description.
31364 Ensure that this constraint holds to avoid unexpected surprises.
31366 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
31367 enforce these sanity checks. */
31370 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31371 enum machine_mode mode
, int strict
)
31373 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
31374 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
31375 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
31376 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
31377 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
31378 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
31380 gcc_assert (!strict
);
31384 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
31387 /* ??? This is a lie. We do have moves between mmx/general, and for
31388 mmx/sse2. But by saying we need secondary memory we discourage the
31389 register allocator from using the mmx registers unless needed. */
31390 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
31393 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31395 /* SSE1 doesn't have any direct moves from other classes. */
31399 /* If the target says that inter-unit moves are more expensive
31400 than moving through memory, then don't generate them. */
31401 if (!TARGET_INTER_UNIT_MOVES
)
31404 /* Between SSE and general, we have moves no larger than word size. */
31405 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
31413 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31414 enum machine_mode mode
, int strict
)
31416 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
31419 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31421 On the 80386, this is the size of MODE in words,
31422 except in the FP regs, where a single reg is always enough. */
31424 static unsigned char
31425 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
31427 if (MAYBE_INTEGER_CLASS_P (rclass
))
31429 if (mode
== XFmode
)
31430 return (TARGET_64BIT
? 2 : 3);
31431 else if (mode
== XCmode
)
31432 return (TARGET_64BIT
? 4 : 6);
31434 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
31438 if (COMPLEX_MODE_P (mode
))
31445 /* Return true if the registers in CLASS cannot represent the change from
31446 modes FROM to TO. */
31449 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
31450 enum reg_class regclass
)
31455 /* x87 registers can't do subreg at all, as all values are reformatted
31456 to extended precision. */
31457 if (MAYBE_FLOAT_CLASS_P (regclass
))
31460 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
31462 /* Vector registers do not support QI or HImode loads. If we don't
31463 disallow a change to these modes, reload will assume it's ok to
31464 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31465 the vec_dupv4hi pattern. */
31466 if (GET_MODE_SIZE (from
) < 4)
31469 /* Vector registers do not support subreg with nonzero offsets, which
31470 are otherwise valid for integer registers. Since we can't see
31471 whether we have a nonzero offset from here, prohibit all
31472 nonparadoxical subregs changing size. */
31473 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
31480 /* Return the cost of moving data of mode M between a
31481 register and memory. A value of 2 is the default; this cost is
31482 relative to those in `REGISTER_MOVE_COST'.
31484 This function is used extensively by register_move_cost that is used to
31485 build tables at startup. Make it inline in this case.
31486 When IN is 2, return maximum of in and out move cost.
31488 If moving between registers and memory is more expensive than
31489 between two registers, you should define this macro to express the
31492 Model also increased moving costs of QImode registers in non
31496 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
31500 if (FLOAT_CLASS_P (regclass
))
31518 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
31519 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
31521 if (SSE_CLASS_P (regclass
))
31524 switch (GET_MODE_SIZE (mode
))
31539 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
31540 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
31542 if (MMX_CLASS_P (regclass
))
31545 switch (GET_MODE_SIZE (mode
))
31557 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
31558 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
31560 switch (GET_MODE_SIZE (mode
))
31563 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
31566 return ix86_cost
->int_store
[0];
31567 if (TARGET_PARTIAL_REG_DEPENDENCY
31568 && optimize_function_for_speed_p (cfun
))
31569 cost
= ix86_cost
->movzbl_load
;
31571 cost
= ix86_cost
->int_load
[0];
31573 return MAX (cost
, ix86_cost
->int_store
[0]);
31579 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
31581 return ix86_cost
->movzbl_load
;
31583 return ix86_cost
->int_store
[0] + 4;
31588 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
31589 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
31591 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31592 if (mode
== TFmode
)
31595 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
31597 cost
= ix86_cost
->int_load
[2];
31599 cost
= ix86_cost
->int_store
[2];
31600 return (cost
* (((int) GET_MODE_SIZE (mode
)
31601 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
31606 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
31609 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
31613 /* Return the cost of moving data from a register in class CLASS1 to
31614 one in class CLASS2.
31616 It is not required that the cost always equal 2 when FROM is the same as TO;
31617 on some machines it is expensive to move between registers if they are not
31618 general registers. */
31621 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
31622 reg_class_t class2_i
)
31624 enum reg_class class1
= (enum reg_class
) class1_i
;
31625 enum reg_class class2
= (enum reg_class
) class2_i
;
31627 /* In case we require secondary memory, compute cost of the store followed
31628 by load. In order to avoid bad register allocation choices, we need
31629 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31631 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
31635 cost
+= inline_memory_move_cost (mode
, class1
, 2);
31636 cost
+= inline_memory_move_cost (mode
, class2
, 2);
31638 /* In case of copying from general_purpose_register we may emit multiple
31639 stores followed by single load causing memory size mismatch stall.
31640 Count this as arbitrarily high cost of 20. */
31641 if (targetm
.class_max_nregs (class1
, mode
)
31642 > targetm
.class_max_nregs (class2
, mode
))
31645 /* In the case of FP/MMX moves, the registers actually overlap, and we
31646 have to switch modes in order to treat them differently. */
31647 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
31648 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
31654 /* Moves between SSE/MMX and integer unit are expensive. */
31655 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
31656 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31658 /* ??? By keeping returned value relatively high, we limit the number
31659 of moves between integer and MMX/SSE registers for all targets.
31660 Additionally, high value prevents problem with x86_modes_tieable_p(),
31661 where integer modes in MMX/SSE registers are not tieable
31662 because of missing QImode and HImode moves to, from or between
31663 MMX/SSE registers. */
31664 return MAX (8, ix86_cost
->mmxsse_to_integer
);
31666 if (MAYBE_FLOAT_CLASS_P (class1
))
31667 return ix86_cost
->fp_move
;
31668 if (MAYBE_SSE_CLASS_P (class1
))
31669 return ix86_cost
->sse_move
;
31670 if (MAYBE_MMX_CLASS_P (class1
))
31671 return ix86_cost
->mmx_move
;
31675 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31679 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
31681 /* Flags and only flags can only hold CCmode values. */
31682 if (CC_REGNO_P (regno
))
31683 return GET_MODE_CLASS (mode
) == MODE_CC
;
31684 if (GET_MODE_CLASS (mode
) == MODE_CC
31685 || GET_MODE_CLASS (mode
) == MODE_RANDOM
31686 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
31688 if (FP_REGNO_P (regno
))
31689 return VALID_FP_MODE_P (mode
);
31690 if (SSE_REGNO_P (regno
))
31692 /* We implement the move patterns for all vector modes into and
31693 out of SSE registers, even when no operation instructions
31694 are available. OImode move is available only when AVX is
31696 return ((TARGET_AVX
&& mode
== OImode
)
31697 || VALID_AVX256_REG_MODE (mode
)
31698 || VALID_SSE_REG_MODE (mode
)
31699 || VALID_SSE2_REG_MODE (mode
)
31700 || VALID_MMX_REG_MODE (mode
)
31701 || VALID_MMX_REG_MODE_3DNOW (mode
));
31703 if (MMX_REGNO_P (regno
))
31705 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31706 so if the register is available at all, then we can move data of
31707 the given mode into or out of it. */
31708 return (VALID_MMX_REG_MODE (mode
)
31709 || VALID_MMX_REG_MODE_3DNOW (mode
));
31712 if (mode
== QImode
)
31714 /* Take care for QImode values - they can be in non-QI regs,
31715 but then they do cause partial register stalls. */
31716 if (regno
<= BX_REG
|| TARGET_64BIT
)
31718 if (!TARGET_PARTIAL_REG_STALL
)
31720 return !can_create_pseudo_p ();
31722 /* We handle both integer and floats in the general purpose registers. */
31723 else if (VALID_INT_MODE_P (mode
))
31725 else if (VALID_FP_MODE_P (mode
))
31727 else if (VALID_DFP_MODE_P (mode
))
31729 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31730 on to use that value in smaller contexts, this can easily force a
31731 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31732 supporting DImode, allow it. */
31733 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
31739 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31740 tieable integer mode. */
31743 ix86_tieable_integer_mode_p (enum machine_mode mode
)
31752 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
31755 return TARGET_64BIT
;
31762 /* Return true if MODE1 is accessible in a register that can hold MODE2
31763 without copying. That is, all register classes that can hold MODE2
31764 can also hold MODE1. */
31767 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
31769 if (mode1
== mode2
)
31772 if (ix86_tieable_integer_mode_p (mode1
)
31773 && ix86_tieable_integer_mode_p (mode2
))
31776 /* MODE2 being XFmode implies fp stack or general regs, which means we
31777 can tie any smaller floating point modes to it. Note that we do not
31778 tie this with TFmode. */
31779 if (mode2
== XFmode
)
31780 return mode1
== SFmode
|| mode1
== DFmode
;
31782 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31783 that we can tie it with SFmode. */
31784 if (mode2
== DFmode
)
31785 return mode1
== SFmode
;
31787 /* If MODE2 is only appropriate for an SSE register, then tie with
31788 any other mode acceptable to SSE registers. */
31789 if (GET_MODE_SIZE (mode2
) == 32
31790 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31791 return (GET_MODE_SIZE (mode1
) == 32
31792 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31793 if (GET_MODE_SIZE (mode2
) == 16
31794 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31795 return (GET_MODE_SIZE (mode1
) == 16
31796 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31798 /* If MODE2 is appropriate for an MMX register, then tie
31799 with any other mode acceptable to MMX registers. */
31800 if (GET_MODE_SIZE (mode2
) == 8
31801 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
31802 return (GET_MODE_SIZE (mode1
) == 8
31803 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
31808 /* Return the cost of moving between two registers of mode MODE. */
31811 ix86_set_reg_reg_cost (enum machine_mode mode
)
31813 unsigned int units
= UNITS_PER_WORD
;
31815 switch (GET_MODE_CLASS (mode
))
31821 units
= GET_MODE_SIZE (CCmode
);
31825 if ((TARGET_SSE2
&& mode
== TFmode
)
31826 || (TARGET_80387
&& mode
== XFmode
)
31827 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
31828 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
31829 units
= GET_MODE_SIZE (mode
);
31832 case MODE_COMPLEX_FLOAT
:
31833 if ((TARGET_SSE2
&& mode
== TCmode
)
31834 || (TARGET_80387
&& mode
== XCmode
)
31835 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
31836 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
31837 units
= GET_MODE_SIZE (mode
);
31840 case MODE_VECTOR_INT
:
31841 case MODE_VECTOR_FLOAT
:
31842 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
31843 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
31844 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
31845 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
31846 units
= GET_MODE_SIZE (mode
);
31849 /* Return the cost of moving between two registers of mode MODE,
31850 assuming that the move will be in pieces of at most UNITS bytes. */
31851 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
31854 /* Compute a (partial) cost for rtx X. Return true if the complete
31855 cost has been computed, and false if subexpressions should be
31856 scanned. In either case, *TOTAL contains the cost result. */
31859 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int opno
, int *total
,
31862 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
31863 enum machine_mode mode
= GET_MODE (x
);
31864 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
31869 if (register_operand (SET_DEST (x
), VOIDmode
)
31870 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
31872 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
31881 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
31883 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
31885 else if (flag_pic
&& SYMBOLIC_CONST (x
)
31887 || (!GET_CODE (x
) != LABEL_REF
31888 && (GET_CODE (x
) != SYMBOL_REF
31889 || !SYMBOL_REF_LOCAL_P (x
)))))
31896 if (mode
== VOIDmode
)
31899 switch (standard_80387_constant_p (x
))
31904 default: /* Other constants */
31909 /* Start with (MEM (SYMBOL_REF)), since that's where
31910 it'll probably end up. Add a penalty for size. */
31911 *total
= (COSTS_N_INSNS (1)
31912 + (flag_pic
!= 0 && !TARGET_64BIT
)
31913 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
31919 /* The zero extensions is often completely free on x86_64, so make
31920 it as cheap as possible. */
31921 if (TARGET_64BIT
&& mode
== DImode
31922 && GET_MODE (XEXP (x
, 0)) == SImode
)
31924 else if (TARGET_ZERO_EXTEND_WITH_AND
)
31925 *total
= cost
->add
;
31927 *total
= cost
->movzx
;
31931 *total
= cost
->movsx
;
31935 if (CONST_INT_P (XEXP (x
, 1))
31936 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
31938 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31941 *total
= cost
->add
;
31944 if ((value
== 2 || value
== 3)
31945 && cost
->lea
<= cost
->shift_const
)
31947 *total
= cost
->lea
;
31957 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
31959 if (CONST_INT_P (XEXP (x
, 1)))
31961 if (INTVAL (XEXP (x
, 1)) > 32)
31962 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
31964 *total
= cost
->shift_const
* 2;
31968 if (GET_CODE (XEXP (x
, 1)) == AND
)
31969 *total
= cost
->shift_var
* 2;
31971 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
31976 if (CONST_INT_P (XEXP (x
, 1)))
31977 *total
= cost
->shift_const
;
31979 *total
= cost
->shift_var
;
31987 gcc_assert (FLOAT_MODE_P (mode
));
31988 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
31990 /* ??? SSE scalar/vector cost should be used here. */
31991 /* ??? Bald assumption that fma has the same cost as fmul. */
31992 *total
= cost
->fmul
;
31993 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
31995 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31997 if (GET_CODE (sub
) == NEG
)
31998 sub
= XEXP (sub
, 0);
31999 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
32002 if (GET_CODE (sub
) == NEG
)
32003 sub
= XEXP (sub
, 0);
32004 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
32009 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32011 /* ??? SSE scalar cost should be used here. */
32012 *total
= cost
->fmul
;
32015 else if (X87_FLOAT_MODE_P (mode
))
32017 *total
= cost
->fmul
;
32020 else if (FLOAT_MODE_P (mode
))
32022 /* ??? SSE vector cost should be used here. */
32023 *total
= cost
->fmul
;
32028 rtx op0
= XEXP (x
, 0);
32029 rtx op1
= XEXP (x
, 1);
32031 if (CONST_INT_P (XEXP (x
, 1)))
32033 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32034 for (nbits
= 0; value
!= 0; value
&= value
- 1)
32038 /* This is arbitrary. */
32041 /* Compute costs correctly for widening multiplication. */
32042 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
32043 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
32044 == GET_MODE_SIZE (mode
))
32046 int is_mulwiden
= 0;
32047 enum machine_mode inner_mode
= GET_MODE (op0
);
32049 if (GET_CODE (op0
) == GET_CODE (op1
))
32050 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
32051 else if (CONST_INT_P (op1
))
32053 if (GET_CODE (op0
) == SIGN_EXTEND
)
32054 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
32057 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
32061 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
32064 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
32065 + nbits
* cost
->mult_bit
32066 + rtx_cost (op0
, outer_code
, opno
, speed
)
32067 + rtx_cost (op1
, outer_code
, opno
, speed
));
32076 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32077 /* ??? SSE cost should be used here. */
32078 *total
= cost
->fdiv
;
32079 else if (X87_FLOAT_MODE_P (mode
))
32080 *total
= cost
->fdiv
;
32081 else if (FLOAT_MODE_P (mode
))
32082 /* ??? SSE vector cost should be used here. */
32083 *total
= cost
->fdiv
;
32085 *total
= cost
->divide
[MODE_INDEX (mode
)];
32089 if (GET_MODE_CLASS (mode
) == MODE_INT
32090 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
32092 if (GET_CODE (XEXP (x
, 0)) == PLUS
32093 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
32094 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
32095 && CONSTANT_P (XEXP (x
, 1)))
32097 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
32098 if (val
== 2 || val
== 4 || val
== 8)
32100 *total
= cost
->lea
;
32101 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32102 outer_code
, opno
, speed
);
32103 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
32104 outer_code
, opno
, speed
);
32105 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32109 else if (GET_CODE (XEXP (x
, 0)) == MULT
32110 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
32112 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
32113 if (val
== 2 || val
== 4 || val
== 8)
32115 *total
= cost
->lea
;
32116 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32117 outer_code
, opno
, speed
);
32118 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32122 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
32124 *total
= cost
->lea
;
32125 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32126 outer_code
, opno
, speed
);
32127 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32128 outer_code
, opno
, speed
);
32129 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32136 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32138 /* ??? SSE cost should be used here. */
32139 *total
= cost
->fadd
;
32142 else if (X87_FLOAT_MODE_P (mode
))
32144 *total
= cost
->fadd
;
32147 else if (FLOAT_MODE_P (mode
))
32149 /* ??? SSE vector cost should be used here. */
32150 *total
= cost
->fadd
;
32158 if (!TARGET_64BIT
&& mode
== DImode
)
32160 *total
= (cost
->add
* 2
32161 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
32162 << (GET_MODE (XEXP (x
, 0)) != DImode
))
32163 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
32164 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
32170 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32172 /* ??? SSE cost should be used here. */
32173 *total
= cost
->fchs
;
32176 else if (X87_FLOAT_MODE_P (mode
))
32178 *total
= cost
->fchs
;
32181 else if (FLOAT_MODE_P (mode
))
32183 /* ??? SSE vector cost should be used here. */
32184 *total
= cost
->fchs
;
32190 if (!TARGET_64BIT
&& mode
== DImode
)
32191 *total
= cost
->add
* 2;
32193 *total
= cost
->add
;
32197 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
32198 && XEXP (XEXP (x
, 0), 1) == const1_rtx
32199 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
32200 && XEXP (x
, 1) == const0_rtx
)
32202 /* This kind of construct is implemented using test[bwl].
32203 Treat it as if we had an AND. */
32204 *total
= (cost
->add
32205 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
32206 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
32212 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
32217 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32218 /* ??? SSE cost should be used here. */
32219 *total
= cost
->fabs
;
32220 else if (X87_FLOAT_MODE_P (mode
))
32221 *total
= cost
->fabs
;
32222 else if (FLOAT_MODE_P (mode
))
32223 /* ??? SSE vector cost should be used here. */
32224 *total
= cost
->fabs
;
32228 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32229 /* ??? SSE cost should be used here. */
32230 *total
= cost
->fsqrt
;
32231 else if (X87_FLOAT_MODE_P (mode
))
32232 *total
= cost
->fsqrt
;
32233 else if (FLOAT_MODE_P (mode
))
32234 /* ??? SSE vector cost should be used here. */
32235 *total
= cost
->fsqrt
;
32239 if (XINT (x
, 1) == UNSPEC_TP
)
32246 case VEC_DUPLICATE
:
32247 /* ??? Assume all of these vector manipulation patterns are
32248 recognizable. In which case they all pretty much have the
32250 *total
= COSTS_N_INSNS (1);
32260 static int current_machopic_label_num
;
32262 /* Given a symbol name and its associated stub, write out the
32263 definition of the stub. */
32266 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
32268 unsigned int length
;
32269 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
32270 int label
= ++current_machopic_label_num
;
32272 /* For 64-bit we shouldn't get here. */
32273 gcc_assert (!TARGET_64BIT
);
32275 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32276 symb
= targetm
.strip_name_encoding (symb
);
32278 length
= strlen (stub
);
32279 binder_name
= XALLOCAVEC (char, length
+ 32);
32280 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
32282 length
= strlen (symb
);
32283 symbol_name
= XALLOCAVEC (char, length
+ 32);
32284 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
32286 sprintf (lazy_ptr_name
, "L%d$lz", label
);
32288 if (MACHOPIC_ATT_STUB
)
32289 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
32290 else if (MACHOPIC_PURE
)
32291 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
32293 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
32295 fprintf (file
, "%s:\n", stub
);
32296 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32298 if (MACHOPIC_ATT_STUB
)
32300 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
32302 else if (MACHOPIC_PURE
)
32305 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32306 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
32307 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
32308 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
32309 label
, lazy_ptr_name
, label
);
32310 fprintf (file
, "\tjmp\t*%%ecx\n");
32313 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
32315 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
32316 it needs no stub-binding-helper. */
32317 if (MACHOPIC_ATT_STUB
)
32320 fprintf (file
, "%s:\n", binder_name
);
32324 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
32325 fprintf (file
, "\tpushl\t%%ecx\n");
32328 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
32330 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
32332 /* N.B. Keep the correspondence of these
32333 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
32334 old-pic/new-pic/non-pic stubs; altering this will break
32335 compatibility with existing dylibs. */
32338 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32339 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
32342 /* 16-byte -mdynamic-no-pic stub. */
32343 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
32345 fprintf (file
, "%s:\n", lazy_ptr_name
);
32346 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32347 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
32349 #endif /* TARGET_MACHO */
32351 /* Order the registers for register allocator. */
32354 x86_order_regs_for_local_alloc (void)
32359 /* First allocate the local general purpose registers. */
32360 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32361 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
32362 reg_alloc_order
[pos
++] = i
;
32364 /* Global general purpose registers. */
32365 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32366 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
32367 reg_alloc_order
[pos
++] = i
;
32369 /* x87 registers come first in case we are doing FP math
32371 if (!TARGET_SSE_MATH
)
32372 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32373 reg_alloc_order
[pos
++] = i
;
32375 /* SSE registers. */
32376 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
32377 reg_alloc_order
[pos
++] = i
;
32378 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
32379 reg_alloc_order
[pos
++] = i
;
32381 /* x87 registers. */
32382 if (TARGET_SSE_MATH
)
32383 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32384 reg_alloc_order
[pos
++] = i
;
32386 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
32387 reg_alloc_order
[pos
++] = i
;
32389 /* Initialize the rest of array as we do not allocate some registers
32391 while (pos
< FIRST_PSEUDO_REGISTER
)
32392 reg_alloc_order
[pos
++] = 0;
32395 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
32396 in struct attribute_spec handler. */
32398 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
32400 int flags ATTRIBUTE_UNUSED
,
32401 bool *no_add_attrs
)
32403 if (TREE_CODE (*node
) != FUNCTION_TYPE
32404 && TREE_CODE (*node
) != METHOD_TYPE
32405 && TREE_CODE (*node
) != FIELD_DECL
32406 && TREE_CODE (*node
) != TYPE_DECL
)
32408 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32410 *no_add_attrs
= true;
32415 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
32417 *no_add_attrs
= true;
32420 if (is_attribute_p ("callee_pop_aggregate_return", name
))
32424 cst
= TREE_VALUE (args
);
32425 if (TREE_CODE (cst
) != INTEGER_CST
)
32427 warning (OPT_Wattributes
,
32428 "%qE attribute requires an integer constant argument",
32430 *no_add_attrs
= true;
32432 else if (compare_tree_int (cst
, 0) != 0
32433 && compare_tree_int (cst
, 1) != 0)
32435 warning (OPT_Wattributes
,
32436 "argument to %qE attribute is neither zero, nor one",
32438 *no_add_attrs
= true;
32447 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
32448 struct attribute_spec.handler. */
32450 ix86_handle_abi_attribute (tree
*node
, tree name
,
32451 tree args ATTRIBUTE_UNUSED
,
32452 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32454 if (TREE_CODE (*node
) != FUNCTION_TYPE
32455 && TREE_CODE (*node
) != METHOD_TYPE
32456 && TREE_CODE (*node
) != FIELD_DECL
32457 && TREE_CODE (*node
) != TYPE_DECL
)
32459 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32461 *no_add_attrs
= true;
32465 /* Can combine regparm with all attributes but fastcall. */
32466 if (is_attribute_p ("ms_abi", name
))
32468 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
32470 error ("ms_abi and sysv_abi attributes are not compatible");
32475 else if (is_attribute_p ("sysv_abi", name
))
32477 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
32479 error ("ms_abi and sysv_abi attributes are not compatible");
32488 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32489 struct attribute_spec.handler. */
32491 ix86_handle_struct_attribute (tree
*node
, tree name
,
32492 tree args ATTRIBUTE_UNUSED
,
32493 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32496 if (DECL_P (*node
))
32498 if (TREE_CODE (*node
) == TYPE_DECL
)
32499 type
= &TREE_TYPE (*node
);
32504 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
32506 warning (OPT_Wattributes
, "%qE attribute ignored",
32508 *no_add_attrs
= true;
32511 else if ((is_attribute_p ("ms_struct", name
)
32512 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
32513 || ((is_attribute_p ("gcc_struct", name
)
32514 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
32516 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
32518 *no_add_attrs
= true;
32525 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
32526 tree args ATTRIBUTE_UNUSED
,
32527 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32529 if (TREE_CODE (*node
) != FUNCTION_DECL
)
32531 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32533 *no_add_attrs
= true;
32539 ix86_ms_bitfield_layout_p (const_tree record_type
)
32541 return ((TARGET_MS_BITFIELD_LAYOUT
32542 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
32543 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
32546 /* Returns an expression indicating where the this parameter is
32547 located on entry to the FUNCTION. */
32550 x86_this_parameter (tree function
)
32552 tree type
= TREE_TYPE (function
);
32553 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
32558 const int *parm_regs
;
32560 if (ix86_function_type_abi (type
) == MS_ABI
)
32561 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
32563 parm_regs
= x86_64_int_parameter_registers
;
32564 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
32567 nregs
= ix86_function_regparm (type
, function
);
32569 if (nregs
> 0 && !stdarg_p (type
))
32572 unsigned int ccvt
= ix86_get_callcvt (type
);
32574 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
32575 regno
= aggr
? DX_REG
: CX_REG
;
32576 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
32580 return gen_rtx_MEM (SImode
,
32581 plus_constant (Pmode
, stack_pointer_rtx
, 4));
32590 return gen_rtx_MEM (SImode
,
32591 plus_constant (Pmode
,
32592 stack_pointer_rtx
, 4));
32595 return gen_rtx_REG (SImode
, regno
);
32598 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
32602 /* Determine whether x86_output_mi_thunk can succeed. */
32605 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
32606 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
32607 HOST_WIDE_INT vcall_offset
, const_tree function
)
32609 /* 64-bit can handle anything. */
32613 /* For 32-bit, everything's fine if we have one free register. */
32614 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
32617 /* Need a free register for vcall_offset. */
32621 /* Need a free register for GOT references. */
32622 if (flag_pic
&& !targetm
.binds_local_p (function
))
32625 /* Otherwise ok. */
32629 /* Output the assembler code for a thunk function. THUNK_DECL is the
32630 declaration for the thunk function itself, FUNCTION is the decl for
32631 the target function. DELTA is an immediate constant offset to be
32632 added to THIS. If VCALL_OFFSET is nonzero, the word at
32633 *(*this + vcall_offset) should be added to THIS. */
32636 x86_output_mi_thunk (FILE *file
,
32637 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
32638 HOST_WIDE_INT vcall_offset
, tree function
)
32640 rtx this_param
= x86_this_parameter (function
);
32641 rtx this_reg
, tmp
, fnaddr
;
32643 emit_note (NOTE_INSN_PROLOGUE_END
);
32645 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32646 pull it in now and let DELTA benefit. */
32647 if (REG_P (this_param
))
32648 this_reg
= this_param
;
32649 else if (vcall_offset
)
32651 /* Put the this parameter into %eax. */
32652 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
32653 emit_move_insn (this_reg
, this_param
);
32656 this_reg
= NULL_RTX
;
32658 /* Adjust the this parameter by a fixed constant. */
32661 rtx delta_rtx
= GEN_INT (delta
);
32662 rtx delta_dst
= this_reg
? this_reg
: this_param
;
32666 if (!x86_64_general_operand (delta_rtx
, Pmode
))
32668 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
32669 emit_move_insn (tmp
, delta_rtx
);
32674 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
32677 /* Adjust the this parameter by a value stored in the vtable. */
32680 rtx vcall_addr
, vcall_mem
, this_mem
;
32681 unsigned int tmp_regno
;
32684 tmp_regno
= R10_REG
;
32687 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
32688 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
32689 tmp_regno
= AX_REG
;
32691 tmp_regno
= CX_REG
;
32693 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
32695 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
32696 if (Pmode
!= ptr_mode
)
32697 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
32698 emit_move_insn (tmp
, this_mem
);
32700 /* Adjust the this parameter. */
32701 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
32703 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
32705 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
32706 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
32707 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
32710 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
32711 if (Pmode
!= ptr_mode
)
32712 emit_insn (gen_addsi_1_zext (this_reg
,
32713 gen_rtx_REG (ptr_mode
,
32717 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
32720 /* If necessary, drop THIS back to its stack slot. */
32721 if (this_reg
&& this_reg
!= this_param
)
32722 emit_move_insn (this_param
, this_reg
);
32724 fnaddr
= XEXP (DECL_RTL (function
), 0);
32727 if (!flag_pic
|| targetm
.binds_local_p (function
)
32728 || cfun
->machine
->call_abi
== MS_ABI
)
32732 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
32733 tmp
= gen_rtx_CONST (Pmode
, tmp
);
32734 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
32739 if (!flag_pic
|| targetm
.binds_local_p (function
))
32742 else if (TARGET_MACHO
)
32744 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
32745 fnaddr
= XEXP (fnaddr
, 0);
32747 #endif /* TARGET_MACHO */
32750 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
32751 output_set_got (tmp
, NULL_RTX
);
32753 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
32754 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
32755 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
32759 /* Our sibling call patterns do not allow memories, because we have no
32760 predicate that can distinguish between frame and non-frame memory.
32761 For our purposes here, we can get away with (ab)using a jump pattern,
32762 because we're going to do no optimization. */
32763 if (MEM_P (fnaddr
))
32764 emit_jump_insn (gen_indirect_jump (fnaddr
));
32767 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
32768 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
32769 tmp
= emit_call_insn (tmp
);
32770 SIBLING_CALL_P (tmp
) = 1;
32774 /* Emit just enough of rest_of_compilation to get the insns emitted.
32775 Note that use_thunk calls assemble_start_function et al. */
32776 tmp
= get_insns ();
32777 insn_locators_alloc ();
32778 shorten_branches (tmp
);
32779 final_start_function (tmp
, file
, 1);
32780 final (tmp
, file
, 1);
32781 final_end_function ();
32785 x86_file_start (void)
32787 default_file_start ();
32789 darwin_file_start ();
32791 if (X86_FILE_START_VERSION_DIRECTIVE
)
32792 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
32793 if (X86_FILE_START_FLTUSED
)
32794 fputs ("\t.global\t__fltused\n", asm_out_file
);
32795 if (ix86_asm_dialect
== ASM_INTEL
)
32796 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
32800 x86_field_alignment (tree field
, int computed
)
32802 enum machine_mode mode
;
32803 tree type
= TREE_TYPE (field
);
32805 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
32807 mode
= TYPE_MODE (strip_array_types (type
));
32808 if (mode
== DFmode
|| mode
== DCmode
32809 || GET_MODE_CLASS (mode
) == MODE_INT
32810 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
32811 return MIN (32, computed
);
32815 /* Output assembler code to FILE to increment profiler label # LABELNO
32816 for profiling a function entry. */
32818 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
32820 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
32825 #ifndef NO_PROFILE_COUNTERS
32826 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
32829 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
32830 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
32832 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32836 #ifndef NO_PROFILE_COUNTERS
32837 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
32840 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
32844 #ifndef NO_PROFILE_COUNTERS
32845 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
32848 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32852 /* We don't have exact information about the insn sizes, but we may assume
32853 quite safely that we are informed about all 1 byte insns and memory
32854 address sizes. This is enough to eliminate unnecessary padding in
32858 min_insn_size (rtx insn
)
32862 if (!INSN_P (insn
) || !active_insn_p (insn
))
32865 /* Discard alignments we've emit and jump instructions. */
32866 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
32867 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
32869 if (JUMP_TABLE_DATA_P (insn
))
32872 /* Important case - calls are always 5 bytes.
32873 It is common to have many calls in the row. */
32875 && symbolic_reference_mentioned_p (PATTERN (insn
))
32876 && !SIBLING_CALL_P (insn
))
32878 len
= get_attr_length (insn
);
32882 /* For normal instructions we rely on get_attr_length being exact,
32883 with a few exceptions. */
32884 if (!JUMP_P (insn
))
32886 enum attr_type type
= get_attr_type (insn
);
32891 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
32892 || asm_noperands (PATTERN (insn
)) >= 0)
32899 /* Otherwise trust get_attr_length. */
32903 l
= get_attr_length_address (insn
);
32904 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
32913 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32915 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32919 ix86_avoid_jump_mispredicts (void)
32921 rtx insn
, start
= get_insns ();
32922 int nbytes
= 0, njumps
= 0;
32925 /* Look for all minimal intervals of instructions containing 4 jumps.
32926 The intervals are bounded by START and INSN. NBYTES is the total
32927 size of instructions in the interval including INSN and not including
32928 START. When the NBYTES is smaller than 16 bytes, it is possible
32929 that the end of START and INSN ends up in the same 16byte page.
32931 The smallest offset in the page INSN can start is the case where START
32932 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32933 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32935 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
32939 if (LABEL_P (insn
))
32941 int align
= label_to_alignment (insn
);
32942 int max_skip
= label_to_max_skip (insn
);
32946 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32947 already in the current 16 byte page, because otherwise
32948 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32949 bytes to reach 16 byte boundary. */
32951 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
32954 fprintf (dump_file
, "Label %i with max_skip %i\n",
32955 INSN_UID (insn
), max_skip
);
32958 while (nbytes
+ max_skip
>= 16)
32960 start
= NEXT_INSN (start
);
32961 if ((JUMP_P (start
)
32962 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32963 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32965 njumps
--, isjump
= 1;
32968 nbytes
-= min_insn_size (start
);
32974 min_size
= min_insn_size (insn
);
32975 nbytes
+= min_size
;
32977 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
32978 INSN_UID (insn
), min_size
);
32980 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
32981 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
32989 start
= NEXT_INSN (start
);
32990 if ((JUMP_P (start
)
32991 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32992 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32994 njumps
--, isjump
= 1;
32997 nbytes
-= min_insn_size (start
);
32999 gcc_assert (njumps
>= 0);
33001 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
33002 INSN_UID (start
), INSN_UID (insn
), nbytes
);
33004 if (njumps
== 3 && isjump
&& nbytes
< 16)
33006 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
33009 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
33010 INSN_UID (insn
), padsize
);
33011 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
33017 /* AMD Athlon works faster
33018 when RET is not destination of conditional jump or directly preceded
33019 by other jump instruction. We avoid the penalty by inserting NOP just
33020 before the RET instructions in such cases. */
33022 ix86_pad_returns (void)
33027 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33029 basic_block bb
= e
->src
;
33030 rtx ret
= BB_END (bb
);
33032 bool replace
= false;
33034 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
33035 || optimize_bb_for_size_p (bb
))
33037 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
33038 if (active_insn_p (prev
) || LABEL_P (prev
))
33040 if (prev
&& LABEL_P (prev
))
33045 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33046 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
33047 && !(e
->flags
& EDGE_FALLTHRU
))
33052 prev
= prev_active_insn (ret
);
33054 && ((JUMP_P (prev
) && any_condjump_p (prev
))
33057 /* Empty functions get branch mispredict even when
33058 the jump destination is not visible to us. */
33059 if (!prev
&& !optimize_function_for_size_p (cfun
))
33064 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
33070 /* Count the minimum number of instructions in BB. Return 4 if the
33071 number of instructions >= 4. */
33074 ix86_count_insn_bb (basic_block bb
)
33077 int insn_count
= 0;
33079 /* Count number of instructions in this block. Return 4 if the number
33080 of instructions >= 4. */
33081 FOR_BB_INSNS (bb
, insn
)
33083 /* Only happen in exit blocks. */
33085 && ANY_RETURN_P (PATTERN (insn
)))
33088 if (NONDEBUG_INSN_P (insn
)
33089 && GET_CODE (PATTERN (insn
)) != USE
33090 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
33093 if (insn_count
>= 4)
33102 /* Count the minimum number of instructions in code path in BB.
33103 Return 4 if the number of instructions >= 4. */
33106 ix86_count_insn (basic_block bb
)
33110 int min_prev_count
;
33112 /* Only bother counting instructions along paths with no
33113 more than 2 basic blocks between entry and exit. Given
33114 that BB has an edge to exit, determine if a predecessor
33115 of BB has an edge from entry. If so, compute the number
33116 of instructions in the predecessor block. If there
33117 happen to be multiple such blocks, compute the minimum. */
33118 min_prev_count
= 4;
33119 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33122 edge_iterator prev_ei
;
33124 if (e
->src
== ENTRY_BLOCK_PTR
)
33126 min_prev_count
= 0;
33129 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
33131 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
33133 int count
= ix86_count_insn_bb (e
->src
);
33134 if (count
< min_prev_count
)
33135 min_prev_count
= count
;
33141 if (min_prev_count
< 4)
33142 min_prev_count
+= ix86_count_insn_bb (bb
);
33144 return min_prev_count
;
33147 /* Pad short funtion to 4 instructions. */
33150 ix86_pad_short_function (void)
33155 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33157 rtx ret
= BB_END (e
->src
);
33158 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
33160 int insn_count
= ix86_count_insn (e
->src
);
33162 /* Pad short function. */
33163 if (insn_count
< 4)
33167 /* Find epilogue. */
33170 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
33171 insn
= PREV_INSN (insn
);
33176 /* Two NOPs count as one instruction. */
33177 insn_count
= 2 * (4 - insn_count
);
33178 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
33184 /* Implement machine specific optimizations. We implement padding of returns
33185 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
33189 /* We are freeing block_for_insn in the toplev to keep compatibility
33190 with old MDEP_REORGS that are not CFG based. Recompute it now. */
33191 compute_bb_for_insn ();
33193 /* Run the vzeroupper optimization if needed. */
33194 if (TARGET_VZEROUPPER
)
33195 move_or_delete_vzeroupper ();
33197 if (optimize
&& optimize_function_for_speed_p (cfun
))
33199 if (TARGET_PAD_SHORT_FUNCTION
)
33200 ix86_pad_short_function ();
33201 else if (TARGET_PAD_RETURNS
)
33202 ix86_pad_returns ();
33203 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33204 if (TARGET_FOUR_JUMP_LIMIT
)
33205 ix86_avoid_jump_mispredicts ();
33210 /* Return nonzero when QImode register that must be represented via REX prefix
33213 x86_extended_QIreg_mentioned_p (rtx insn
)
33216 extract_insn_cached (insn
);
33217 for (i
= 0; i
< recog_data
.n_operands
; i
++)
33218 if (REG_P (recog_data
.operand
[i
])
33219 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
33224 /* Return nonzero when P points to register encoded via REX prefix.
33225 Called via for_each_rtx. */
33227 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
33229 unsigned int regno
;
33232 regno
= REGNO (*p
);
33233 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
33236 /* Return true when INSN mentions register that must be encoded using REX
33239 x86_extended_reg_mentioned_p (rtx insn
)
33241 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
33242 extended_reg_mentioned_1
, NULL
);
33245 /* If profitable, negate (without causing overflow) integer constant
33246 of mode MODE at location LOC. Return true in this case. */
33248 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
33252 if (!CONST_INT_P (*loc
))
33258 /* DImode x86_64 constants must fit in 32 bits. */
33259 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
33270 gcc_unreachable ();
33273 /* Avoid overflows. */
33274 if (mode_signbit_p (mode
, *loc
))
33277 val
= INTVAL (*loc
);
33279 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
33280 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
33281 if ((val
< 0 && val
!= -128)
33284 *loc
= GEN_INT (-val
);
33291 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
33292 optabs would emit if we didn't have TFmode patterns. */
33295 x86_emit_floatuns (rtx operands
[2])
33297 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
33298 enum machine_mode mode
, inmode
;
33300 inmode
= GET_MODE (operands
[1]);
33301 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
33304 in
= force_reg (inmode
, operands
[1]);
33305 mode
= GET_MODE (out
);
33306 neglab
= gen_label_rtx ();
33307 donelab
= gen_label_rtx ();
33308 f0
= gen_reg_rtx (mode
);
33310 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
33312 expand_float (out
, in
, 0);
33314 emit_jump_insn (gen_jump (donelab
));
33317 emit_label (neglab
);
33319 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
33321 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
33323 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
33325 expand_float (f0
, i0
, 0);
33327 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
33329 emit_label (donelab
);
33332 /* AVX2 does support 32-byte integer vector operations,
33333 thus the longest vector we are faced with is V32QImode. */
33334 #define MAX_VECT_LEN 32
33336 struct expand_vec_perm_d
33338 rtx target
, op0
, op1
;
33339 unsigned char perm
[MAX_VECT_LEN
];
33340 enum machine_mode vmode
;
33341 unsigned char nelt
;
33342 bool one_operand_p
;
33346 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
33347 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
33349 /* Get a vector mode of the same size as the original but with elements
33350 twice as wide. This is only guaranteed to apply to integral vectors. */
33352 static inline enum machine_mode
33353 get_mode_wider_vector (enum machine_mode o
)
33355 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
33356 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
33357 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
33358 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
33362 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33363 with all elements equal to VAR. Return true if successful. */
33366 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
33367 rtx target
, rtx val
)
33390 /* First attempt to recognize VAL as-is. */
33391 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33392 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
33393 if (recog_memoized (insn
) < 0)
33396 /* If that fails, force VAL into a register. */
33399 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
33400 seq
= get_insns ();
33403 emit_insn_before (seq
, insn
);
33405 ok
= recog_memoized (insn
) >= 0;
33414 if (TARGET_SSE
|| TARGET_3DNOW_A
)
33418 val
= gen_lowpart (SImode
, val
);
33419 x
= gen_rtx_TRUNCATE (HImode
, val
);
33420 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
33421 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33434 struct expand_vec_perm_d dperm
;
33438 memset (&dperm
, 0, sizeof (dperm
));
33439 dperm
.target
= target
;
33440 dperm
.vmode
= mode
;
33441 dperm
.nelt
= GET_MODE_NUNITS (mode
);
33442 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
33443 dperm
.one_operand_p
= true;
33445 /* Extend to SImode using a paradoxical SUBREG. */
33446 tmp1
= gen_reg_rtx (SImode
);
33447 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
33449 /* Insert the SImode value as low element of a V4SImode vector. */
33450 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
33451 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
33453 ok
= (expand_vec_perm_1 (&dperm
)
33454 || expand_vec_perm_broadcast_1 (&dperm
));
33466 /* Replicate the value once into the next wider mode and recurse. */
33468 enum machine_mode smode
, wsmode
, wvmode
;
33471 smode
= GET_MODE_INNER (mode
);
33472 wvmode
= get_mode_wider_vector (mode
);
33473 wsmode
= GET_MODE_INNER (wvmode
);
33475 val
= convert_modes (wsmode
, smode
, val
, true);
33476 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
33477 GEN_INT (GET_MODE_BITSIZE (smode
)),
33478 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33479 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
33481 x
= gen_lowpart (wvmode
, target
);
33482 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
33490 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
33491 rtx x
= gen_reg_rtx (hvmode
);
33493 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
33496 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
33497 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33506 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33507 whose ONE_VAR element is VAR, and other elements are zero. Return true
33511 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
33512 rtx target
, rtx var
, int one_var
)
33514 enum machine_mode vsimode
;
33517 bool use_vector_set
= false;
33522 /* For SSE4.1, we normally use vector set. But if the second
33523 element is zero and inter-unit moves are OK, we use movq
33525 use_vector_set
= (TARGET_64BIT
33527 && !(TARGET_INTER_UNIT_MOVES
33533 use_vector_set
= TARGET_SSE4_1
;
33536 use_vector_set
= TARGET_SSE2
;
33539 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
33546 use_vector_set
= TARGET_AVX
;
33549 /* Use ix86_expand_vector_set in 64bit mode only. */
33550 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
33556 if (use_vector_set
)
33558 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
33559 var
= force_reg (GET_MODE_INNER (mode
), var
);
33560 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33576 var
= force_reg (GET_MODE_INNER (mode
), var
);
33577 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
33578 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33583 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
33584 new_target
= gen_reg_rtx (mode
);
33586 new_target
= target
;
33587 var
= force_reg (GET_MODE_INNER (mode
), var
);
33588 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
33589 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
33590 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
33593 /* We need to shuffle the value to the correct position, so
33594 create a new pseudo to store the intermediate result. */
33596 /* With SSE2, we can use the integer shuffle insns. */
33597 if (mode
!= V4SFmode
&& TARGET_SSE2
)
33599 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
33601 GEN_INT (one_var
== 1 ? 0 : 1),
33602 GEN_INT (one_var
== 2 ? 0 : 1),
33603 GEN_INT (one_var
== 3 ? 0 : 1)));
33604 if (target
!= new_target
)
33605 emit_move_insn (target
, new_target
);
33609 /* Otherwise convert the intermediate result to V4SFmode and
33610 use the SSE1 shuffle instructions. */
33611 if (mode
!= V4SFmode
)
33613 tmp
= gen_reg_rtx (V4SFmode
);
33614 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
33619 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
33621 GEN_INT (one_var
== 1 ? 0 : 1),
33622 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
33623 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
33625 if (mode
!= V4SFmode
)
33626 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
33627 else if (tmp
!= target
)
33628 emit_move_insn (target
, tmp
);
33630 else if (target
!= new_target
)
33631 emit_move_insn (target
, new_target
);
33636 vsimode
= V4SImode
;
33642 vsimode
= V2SImode
;
33648 /* Zero extend the variable element to SImode and recurse. */
33649 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
33651 x
= gen_reg_rtx (vsimode
);
33652 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
33654 gcc_unreachable ();
33656 emit_move_insn (target
, gen_lowpart (mode
, x
));
33664 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33665 consisting of the values in VALS. It is known that all elements
33666 except ONE_VAR are constants. Return true if successful. */
33669 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
33670 rtx target
, rtx vals
, int one_var
)
33672 rtx var
= XVECEXP (vals
, 0, one_var
);
33673 enum machine_mode wmode
;
33676 const_vec
= copy_rtx (vals
);
33677 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
33678 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
33686 /* For the two element vectors, it's just as easy to use
33687 the general case. */
33691 /* Use ix86_expand_vector_set in 64bit mode only. */
33714 /* There's no way to set one QImode entry easily. Combine
33715 the variable value with its adjacent constant value, and
33716 promote to an HImode set. */
33717 x
= XVECEXP (vals
, 0, one_var
^ 1);
33720 var
= convert_modes (HImode
, QImode
, var
, true);
33721 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
33722 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33723 x
= GEN_INT (INTVAL (x
) & 0xff);
33727 var
= convert_modes (HImode
, QImode
, var
, true);
33728 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
33730 if (x
!= const0_rtx
)
33731 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
33732 1, OPTAB_LIB_WIDEN
);
33734 x
= gen_reg_rtx (wmode
);
33735 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
33736 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
33738 emit_move_insn (target
, gen_lowpart (mode
, x
));
33745 emit_move_insn (target
, const_vec
);
33746 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33750 /* A subroutine of ix86_expand_vector_init_general. Use vector
33751 concatenate to handle the most general case: all values variable,
33752 and none identical. */
33755 ix86_expand_vector_init_concat (enum machine_mode mode
,
33756 rtx target
, rtx
*ops
, int n
)
33758 enum machine_mode cmode
, hmode
= VOIDmode
;
33759 rtx first
[8], second
[4];
33799 gcc_unreachable ();
33802 if (!register_operand (ops
[1], cmode
))
33803 ops
[1] = force_reg (cmode
, ops
[1]);
33804 if (!register_operand (ops
[0], cmode
))
33805 ops
[0] = force_reg (cmode
, ops
[0]);
33806 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33807 gen_rtx_VEC_CONCAT (mode
, ops
[0],
33827 gcc_unreachable ();
33843 gcc_unreachable ();
33848 /* FIXME: We process inputs backward to help RA. PR 36222. */
33851 for (; i
> 0; i
-= 2, j
--)
33853 first
[j
] = gen_reg_rtx (cmode
);
33854 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
33855 ix86_expand_vector_init (false, first
[j
],
33856 gen_rtx_PARALLEL (cmode
, v
));
33862 gcc_assert (hmode
!= VOIDmode
);
33863 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33865 second
[j
] = gen_reg_rtx (hmode
);
33866 ix86_expand_vector_init_concat (hmode
, second
[j
],
33870 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
33873 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
33877 gcc_unreachable ();
33881 /* A subroutine of ix86_expand_vector_init_general. Use vector
33882 interleave to handle the most general case: all values variable,
33883 and none identical. */
33886 ix86_expand_vector_init_interleave (enum machine_mode mode
,
33887 rtx target
, rtx
*ops
, int n
)
33889 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
33892 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
33893 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
33894 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
33899 gen_load_even
= gen_vec_setv8hi
;
33900 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
33901 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33902 inner_mode
= HImode
;
33903 first_imode
= V4SImode
;
33904 second_imode
= V2DImode
;
33905 third_imode
= VOIDmode
;
33908 gen_load_even
= gen_vec_setv16qi
;
33909 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
33910 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
33911 inner_mode
= QImode
;
33912 first_imode
= V8HImode
;
33913 second_imode
= V4SImode
;
33914 third_imode
= V2DImode
;
33917 gcc_unreachable ();
33920 for (i
= 0; i
< n
; i
++)
33922 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33923 op0
= gen_reg_rtx (SImode
);
33924 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
33926 /* Insert the SImode value as low element of V4SImode vector. */
33927 op1
= gen_reg_rtx (V4SImode
);
33928 op0
= gen_rtx_VEC_MERGE (V4SImode
,
33929 gen_rtx_VEC_DUPLICATE (V4SImode
,
33931 CONST0_RTX (V4SImode
),
33933 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
33935 /* Cast the V4SImode vector back to a vector in orignal mode. */
33936 op0
= gen_reg_rtx (mode
);
33937 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
33939 /* Load even elements into the second positon. */
33940 emit_insn (gen_load_even (op0
,
33941 force_reg (inner_mode
,
33945 /* Cast vector to FIRST_IMODE vector. */
33946 ops
[i
] = gen_reg_rtx (first_imode
);
33947 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
33950 /* Interleave low FIRST_IMODE vectors. */
33951 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33953 op0
= gen_reg_rtx (first_imode
);
33954 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
33956 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33957 ops
[j
] = gen_reg_rtx (second_imode
);
33958 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
33961 /* Interleave low SECOND_IMODE vectors. */
33962 switch (second_imode
)
33965 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
33967 op0
= gen_reg_rtx (second_imode
);
33968 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
33971 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33973 ops
[j
] = gen_reg_rtx (third_imode
);
33974 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
33976 second_imode
= V2DImode
;
33977 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33981 op0
= gen_reg_rtx (second_imode
);
33982 emit_insn (gen_interleave_second_low (op0
, ops
[0],
33985 /* Cast the SECOND_IMODE vector back to a vector on original
33987 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33988 gen_lowpart (mode
, op0
)));
33992 gcc_unreachable ();
33996 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33997 all values variable, and none identical. */
34000 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
34001 rtx target
, rtx vals
)
34003 rtx ops
[32], op0
, op1
;
34004 enum machine_mode half_mode
= VOIDmode
;
34011 if (!mmx_ok
&& !TARGET_SSE
)
34023 n
= GET_MODE_NUNITS (mode
);
34024 for (i
= 0; i
< n
; i
++)
34025 ops
[i
] = XVECEXP (vals
, 0, i
);
34026 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
34030 half_mode
= V16QImode
;
34034 half_mode
= V8HImode
;
34038 n
= GET_MODE_NUNITS (mode
);
34039 for (i
= 0; i
< n
; i
++)
34040 ops
[i
] = XVECEXP (vals
, 0, i
);
34041 op0
= gen_reg_rtx (half_mode
);
34042 op1
= gen_reg_rtx (half_mode
);
34043 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
34045 ix86_expand_vector_init_interleave (half_mode
, op1
,
34046 &ops
[n
>> 1], n
>> 2);
34047 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34048 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
34052 if (!TARGET_SSE4_1
)
34060 /* Don't use ix86_expand_vector_init_interleave if we can't
34061 move from GPR to SSE register directly. */
34062 if (!TARGET_INTER_UNIT_MOVES
)
34065 n
= GET_MODE_NUNITS (mode
);
34066 for (i
= 0; i
< n
; i
++)
34067 ops
[i
] = XVECEXP (vals
, 0, i
);
34068 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
34076 gcc_unreachable ();
34080 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
34081 enum machine_mode inner_mode
;
34082 rtx words
[4], shift
;
34084 inner_mode
= GET_MODE_INNER (mode
);
34085 n_elts
= GET_MODE_NUNITS (mode
);
34086 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
34087 n_elt_per_word
= n_elts
/ n_words
;
34088 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
34090 for (i
= 0; i
< n_words
; ++i
)
34092 rtx word
= NULL_RTX
;
34094 for (j
= 0; j
< n_elt_per_word
; ++j
)
34096 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
34097 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
34103 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
34104 word
, 1, OPTAB_LIB_WIDEN
);
34105 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
34106 word
, 1, OPTAB_LIB_WIDEN
);
34114 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
34115 else if (n_words
== 2)
34117 rtx tmp
= gen_reg_rtx (mode
);
34118 emit_clobber (tmp
);
34119 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
34120 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
34121 emit_move_insn (target
, tmp
);
34123 else if (n_words
== 4)
34125 rtx tmp
= gen_reg_rtx (V4SImode
);
34126 gcc_assert (word_mode
== SImode
);
34127 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
34128 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
34129 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
34132 gcc_unreachable ();
34136 /* Initialize vector TARGET via VALS. Suppress the use of MMX
34137 instructions unless MMX_OK is true. */
34140 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
34142 enum machine_mode mode
= GET_MODE (target
);
34143 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34144 int n_elts
= GET_MODE_NUNITS (mode
);
34145 int n_var
= 0, one_var
= -1;
34146 bool all_same
= true, all_const_zero
= true;
34150 for (i
= 0; i
< n_elts
; ++i
)
34152 x
= XVECEXP (vals
, 0, i
);
34153 if (!(CONST_INT_P (x
)
34154 || GET_CODE (x
) == CONST_DOUBLE
34155 || GET_CODE (x
) == CONST_FIXED
))
34156 n_var
++, one_var
= i
;
34157 else if (x
!= CONST0_RTX (inner_mode
))
34158 all_const_zero
= false;
34159 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
34163 /* Constants are best loaded from the constant pool. */
34166 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
34170 /* If all values are identical, broadcast the value. */
34172 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
34173 XVECEXP (vals
, 0, 0)))
34176 /* Values where only one field is non-constant are best loaded from
34177 the pool and overwritten via move later. */
34181 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
34182 XVECEXP (vals
, 0, one_var
),
34186 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
34190 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
34194 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
34196 enum machine_mode mode
= GET_MODE (target
);
34197 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34198 enum machine_mode half_mode
;
34199 bool use_vec_merge
= false;
34201 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
34203 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
34204 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
34205 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
34206 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
34207 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
34208 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
34210 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
34212 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
34213 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
34214 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
34215 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
34216 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
34217 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
34227 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34228 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
34230 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34232 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34233 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34239 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
34243 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34244 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
34246 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34248 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34249 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34256 /* For the two element vectors, we implement a VEC_CONCAT with
34257 the extraction of the other element. */
34259 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
34260 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
34263 op0
= val
, op1
= tmp
;
34265 op0
= tmp
, op1
= val
;
34267 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
34268 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34273 use_vec_merge
= TARGET_SSE4_1
;
34280 use_vec_merge
= true;
34284 /* tmp = target = A B C D */
34285 tmp
= copy_to_reg (target
);
34286 /* target = A A B B */
34287 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
34288 /* target = X A B B */
34289 ix86_expand_vector_set (false, target
, val
, 0);
34290 /* target = A X C D */
34291 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34292 const1_rtx
, const0_rtx
,
34293 GEN_INT (2+4), GEN_INT (3+4)));
34297 /* tmp = target = A B C D */
34298 tmp
= copy_to_reg (target
);
34299 /* tmp = X B C D */
34300 ix86_expand_vector_set (false, tmp
, val
, 0);
34301 /* target = A B X D */
34302 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34303 const0_rtx
, const1_rtx
,
34304 GEN_INT (0+4), GEN_INT (3+4)));
34308 /* tmp = target = A B C D */
34309 tmp
= copy_to_reg (target
);
34310 /* tmp = X B C D */
34311 ix86_expand_vector_set (false, tmp
, val
, 0);
34312 /* target = A B X D */
34313 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34314 const0_rtx
, const1_rtx
,
34315 GEN_INT (2+4), GEN_INT (0+4)));
34319 gcc_unreachable ();
34324 use_vec_merge
= TARGET_SSE4_1
;
34328 /* Element 0 handled by vec_merge below. */
34331 use_vec_merge
= true;
34337 /* With SSE2, use integer shuffles to swap element 0 and ELT,
34338 store into element 0, then shuffle them back. */
34342 order
[0] = GEN_INT (elt
);
34343 order
[1] = const1_rtx
;
34344 order
[2] = const2_rtx
;
34345 order
[3] = GEN_INT (3);
34346 order
[elt
] = const0_rtx
;
34348 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34349 order
[1], order
[2], order
[3]));
34351 ix86_expand_vector_set (false, target
, val
, 0);
34353 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34354 order
[1], order
[2], order
[3]));
34358 /* For SSE1, we have to reuse the V4SF code. */
34359 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
34360 gen_lowpart (SFmode
, val
), elt
);
34365 use_vec_merge
= TARGET_SSE2
;
34368 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34372 use_vec_merge
= TARGET_SSE4_1
;
34379 half_mode
= V16QImode
;
34385 half_mode
= V8HImode
;
34391 half_mode
= V4SImode
;
34397 half_mode
= V2DImode
;
34403 half_mode
= V4SFmode
;
34409 half_mode
= V2DFmode
;
34415 /* Compute offset. */
34419 gcc_assert (i
<= 1);
34421 /* Extract the half. */
34422 tmp
= gen_reg_rtx (half_mode
);
34423 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
34425 /* Put val in tmp at elt. */
34426 ix86_expand_vector_set (false, tmp
, val
, elt
);
34429 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
34438 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
34439 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
34440 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34444 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
34446 emit_move_insn (mem
, target
);
34448 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34449 emit_move_insn (tmp
, val
);
34451 emit_move_insn (target
, mem
);
34456 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
34458 enum machine_mode mode
= GET_MODE (vec
);
34459 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34460 bool use_vec_extr
= false;
34473 use_vec_extr
= true;
34477 use_vec_extr
= TARGET_SSE4_1
;
34489 tmp
= gen_reg_rtx (mode
);
34490 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
34491 GEN_INT (elt
), GEN_INT (elt
),
34492 GEN_INT (elt
+4), GEN_INT (elt
+4)));
34496 tmp
= gen_reg_rtx (mode
);
34497 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
34501 gcc_unreachable ();
34504 use_vec_extr
= true;
34509 use_vec_extr
= TARGET_SSE4_1
;
34523 tmp
= gen_reg_rtx (mode
);
34524 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
34525 GEN_INT (elt
), GEN_INT (elt
),
34526 GEN_INT (elt
), GEN_INT (elt
)));
34530 tmp
= gen_reg_rtx (mode
);
34531 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
34535 gcc_unreachable ();
34538 use_vec_extr
= true;
34543 /* For SSE1, we have to reuse the V4SF code. */
34544 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
34545 gen_lowpart (V4SFmode
, vec
), elt
);
34551 use_vec_extr
= TARGET_SSE2
;
34554 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34558 use_vec_extr
= TARGET_SSE4_1
;
34564 tmp
= gen_reg_rtx (V4SFmode
);
34566 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
34568 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
34569 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34577 tmp
= gen_reg_rtx (V2DFmode
);
34579 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
34581 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
34582 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34590 tmp
= gen_reg_rtx (V16QImode
);
34592 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
34594 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
34595 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
34603 tmp
= gen_reg_rtx (V8HImode
);
34605 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
34607 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
34608 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
34616 tmp
= gen_reg_rtx (V4SImode
);
34618 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
34620 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
34621 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34629 tmp
= gen_reg_rtx (V2DImode
);
34631 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
34633 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
34634 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34640 /* ??? Could extract the appropriate HImode element and shift. */
34647 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
34648 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
34650 /* Let the rtl optimizers know about the zero extension performed. */
34651 if (inner_mode
== QImode
|| inner_mode
== HImode
)
34653 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
34654 target
= gen_lowpart (SImode
, target
);
34657 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34661 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
34663 emit_move_insn (mem
, vec
);
34665 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34666 emit_move_insn (target
, tmp
);
34670 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34671 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34672 The upper bits of DEST are undefined, though they shouldn't cause
34673 exceptions (some bits from src or all zeros are ok). */
34676 emit_reduc_half (rtx dest
, rtx src
, int i
)
34679 switch (GET_MODE (src
))
34683 tem
= gen_sse_movhlps (dest
, src
, src
);
34685 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
34686 GEN_INT (1 + 4), GEN_INT (1 + 4));
34689 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
34695 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
34696 gen_lowpart (V1TImode
, src
),
34701 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
34703 tem
= gen_avx_shufps256 (dest
, src
, src
,
34704 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
34708 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
34710 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
34717 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
34718 gen_lowpart (V4DImode
, src
),
34719 gen_lowpart (V4DImode
, src
),
34722 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
34723 gen_lowpart (V2TImode
, src
),
34727 gcc_unreachable ();
34732 /* Expand a vector reduction. FN is the binary pattern to reduce;
34733 DEST is the destination; IN is the input vector. */
34736 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
34738 rtx half
, dst
, vec
= in
;
34739 enum machine_mode mode
= GET_MODE (in
);
34742 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34744 && mode
== V8HImode
34745 && fn
== gen_uminv8hi3
)
34747 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
34751 for (i
= GET_MODE_BITSIZE (mode
);
34752 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
34755 half
= gen_reg_rtx (mode
);
34756 emit_reduc_half (half
, vec
, i
);
34757 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
34760 dst
= gen_reg_rtx (mode
);
34761 emit_insn (fn (dst
, half
, vec
));
34766 /* Target hook for scalar_mode_supported_p. */
34768 ix86_scalar_mode_supported_p (enum machine_mode mode
)
34770 if (DECIMAL_FLOAT_MODE_P (mode
))
34771 return default_decimal_float_supported_p ();
34772 else if (mode
== TFmode
)
34775 return default_scalar_mode_supported_p (mode
);
34778 /* Implements target hook vector_mode_supported_p. */
34780 ix86_vector_mode_supported_p (enum machine_mode mode
)
34782 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34784 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34786 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34788 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
34790 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
34795 /* Target hook for c_mode_for_suffix. */
34796 static enum machine_mode
34797 ix86_c_mode_for_suffix (char suffix
)
34807 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34809 We do this in the new i386 backend to maintain source compatibility
34810 with the old cc0-based compiler. */
34813 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
34814 tree inputs ATTRIBUTE_UNUSED
,
34817 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
34819 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
34824 /* Implements target vector targetm.asm.encode_section_info. */
34826 static void ATTRIBUTE_UNUSED
34827 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
34829 default_encode_section_info (decl
, rtl
, first
);
34831 if (TREE_CODE (decl
) == VAR_DECL
34832 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
34833 && ix86_in_large_data_p (decl
))
34834 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
34837 /* Worker function for REVERSE_CONDITION. */
34840 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
34842 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
34843 ? reverse_condition (code
)
34844 : reverse_condition_maybe_unordered (code
));
34847 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34851 output_387_reg_move (rtx insn
, rtx
*operands
)
34853 if (REG_P (operands
[0]))
34855 if (REG_P (operands
[1])
34856 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34858 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
34859 return output_387_ffreep (operands
, 0);
34860 return "fstp\t%y0";
34862 if (STACK_TOP_P (operands
[0]))
34863 return "fld%Z1\t%y1";
34866 else if (MEM_P (operands
[0]))
34868 gcc_assert (REG_P (operands
[1]));
34869 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34870 return "fstp%Z0\t%y0";
34873 /* There is no non-popping store to memory for XFmode.
34874 So if we need one, follow the store with a load. */
34875 if (GET_MODE (operands
[0]) == XFmode
)
34876 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34878 return "fst%Z0\t%y0";
34885 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34886 FP status register is set. */
34889 ix86_emit_fp_unordered_jump (rtx label
)
34891 rtx reg
= gen_reg_rtx (HImode
);
34894 emit_insn (gen_x86_fnstsw_1 (reg
));
34896 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
34898 emit_insn (gen_x86_sahf_1 (reg
));
34900 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
34901 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
34905 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
34907 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34908 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
34911 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
34912 gen_rtx_LABEL_REF (VOIDmode
, label
),
34914 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
34916 emit_jump_insn (temp
);
34917 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
34920 /* Output code to perform a log1p XFmode calculation. */
34922 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
34924 rtx label1
= gen_label_rtx ();
34925 rtx label2
= gen_label_rtx ();
34927 rtx tmp
= gen_reg_rtx (XFmode
);
34928 rtx tmp2
= gen_reg_rtx (XFmode
);
34931 emit_insn (gen_absxf2 (tmp
, op1
));
34932 test
= gen_rtx_GE (VOIDmode
, tmp
,
34933 CONST_DOUBLE_FROM_REAL_VALUE (
34934 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
34936 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
34938 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34939 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
34940 emit_jump (label2
);
34942 emit_label (label1
);
34943 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
34944 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
34945 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34946 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
34948 emit_label (label2
);
34951 /* Emit code for round calculation. */
34952 void ix86_emit_i387_round (rtx op0
, rtx op1
)
34954 enum machine_mode inmode
= GET_MODE (op1
);
34955 enum machine_mode outmode
= GET_MODE (op0
);
34956 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
34957 rtx scratch
= gen_reg_rtx (HImode
);
34958 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34959 rtx jump_label
= gen_label_rtx ();
34961 rtx (*gen_abs
) (rtx
, rtx
);
34962 rtx (*gen_neg
) (rtx
, rtx
);
34967 gen_abs
= gen_abssf2
;
34970 gen_abs
= gen_absdf2
;
34973 gen_abs
= gen_absxf2
;
34976 gcc_unreachable ();
34982 gen_neg
= gen_negsf2
;
34985 gen_neg
= gen_negdf2
;
34988 gen_neg
= gen_negxf2
;
34991 gen_neg
= gen_neghi2
;
34994 gen_neg
= gen_negsi2
;
34997 gen_neg
= gen_negdi2
;
35000 gcc_unreachable ();
35003 e1
= gen_reg_rtx (inmode
);
35004 e2
= gen_reg_rtx (inmode
);
35005 res
= gen_reg_rtx (outmode
);
35007 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
35009 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
35011 /* scratch = fxam(op1) */
35012 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
35013 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
35015 /* e1 = fabs(op1) */
35016 emit_insn (gen_abs (e1
, op1
));
35018 /* e2 = e1 + 0.5 */
35019 half
= force_reg (inmode
, half
);
35020 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35021 gen_rtx_PLUS (inmode
, e1
, half
)));
35023 /* res = floor(e2) */
35024 if (inmode
!= XFmode
)
35026 tmp1
= gen_reg_rtx (XFmode
);
35028 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
35029 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
35039 rtx tmp0
= gen_reg_rtx (XFmode
);
35041 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
35043 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35044 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
35045 UNSPEC_TRUNC_NOOP
)));
35049 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
35052 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
35055 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
35058 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
35061 gcc_unreachable ();
35064 /* flags = signbit(a) */
35065 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
35067 /* if (flags) then res = -res */
35068 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
35069 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
35070 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
35072 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35073 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
35074 JUMP_LABEL (insn
) = jump_label
;
35076 emit_insn (gen_neg (res
, res
));
35078 emit_label (jump_label
);
35079 LABEL_NUSES (jump_label
) = 1;
35081 emit_move_insn (op0
, res
);
35084 /* Output code to perform a Newton-Rhapson approximation of a single precision
35085 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
35087 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
35089 rtx x0
, x1
, e0
, e1
;
35091 x0
= gen_reg_rtx (mode
);
35092 e0
= gen_reg_rtx (mode
);
35093 e1
= gen_reg_rtx (mode
);
35094 x1
= gen_reg_rtx (mode
);
35096 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
35098 b
= force_reg (mode
, b
);
35100 /* x0 = rcp(b) estimate */
35101 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35102 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
35105 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35106 gen_rtx_MULT (mode
, x0
, b
)));
35109 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35110 gen_rtx_MULT (mode
, x0
, e0
)));
35113 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35114 gen_rtx_PLUS (mode
, x0
, x0
)));
35117 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
35118 gen_rtx_MINUS (mode
, e1
, e0
)));
35121 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35122 gen_rtx_MULT (mode
, a
, x1
)));
35125 /* Output code to perform a Newton-Rhapson approximation of a
35126 single precision floating point [reciprocal] square root. */
35128 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
35131 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
35134 x0
= gen_reg_rtx (mode
);
35135 e0
= gen_reg_rtx (mode
);
35136 e1
= gen_reg_rtx (mode
);
35137 e2
= gen_reg_rtx (mode
);
35138 e3
= gen_reg_rtx (mode
);
35140 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
35141 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35143 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
35144 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35146 if (VECTOR_MODE_P (mode
))
35148 mthree
= ix86_build_const_vector (mode
, true, mthree
);
35149 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
35152 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
35153 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
35155 a
= force_reg (mode
, a
);
35157 /* x0 = rsqrt(a) estimate */
35158 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35159 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
35162 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
35167 zero
= gen_reg_rtx (mode
);
35168 mask
= gen_reg_rtx (mode
);
35170 zero
= force_reg (mode
, CONST0_RTX(mode
));
35171 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35172 gen_rtx_NE (mode
, zero
, a
)));
35174 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35175 gen_rtx_AND (mode
, x0
, mask
)));
35179 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35180 gen_rtx_MULT (mode
, x0
, a
)));
35182 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35183 gen_rtx_MULT (mode
, e0
, x0
)));
35186 mthree
= force_reg (mode
, mthree
);
35187 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35188 gen_rtx_PLUS (mode
, e1
, mthree
)));
35190 mhalf
= force_reg (mode
, mhalf
);
35192 /* e3 = -.5 * x0 */
35193 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35194 gen_rtx_MULT (mode
, x0
, mhalf
)));
35196 /* e3 = -.5 * e0 */
35197 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35198 gen_rtx_MULT (mode
, e0
, mhalf
)));
35199 /* ret = e2 * e3 */
35200 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35201 gen_rtx_MULT (mode
, e2
, e3
)));
35204 #ifdef TARGET_SOLARIS
35205 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
35208 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
35211 /* With Binutils 2.15, the "@unwind" marker must be specified on
35212 every occurrence of the ".eh_frame" section, not just the first
35215 && strcmp (name
, ".eh_frame") == 0)
35217 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
35218 flags
& SECTION_WRITE
? "aw" : "a");
35223 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
35225 solaris_elf_asm_comdat_section (name
, flags
, decl
);
35230 default_elf_asm_named_section (name
, flags
, decl
);
35232 #endif /* TARGET_SOLARIS */
35234 /* Return the mangling of TYPE if it is an extended fundamental type. */
35236 static const char *
35237 ix86_mangle_type (const_tree type
)
35239 type
= TYPE_MAIN_VARIANT (type
);
35241 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
35242 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
35245 switch (TYPE_MODE (type
))
35248 /* __float128 is "g". */
35251 /* "long double" or __float80 is "e". */
35258 /* For 32-bit code we can save PIC register setup by using
35259 __stack_chk_fail_local hidden function instead of calling
35260 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
35261 register, so it is better to call __stack_chk_fail directly. */
35263 static tree ATTRIBUTE_UNUSED
35264 ix86_stack_protect_fail (void)
35266 return TARGET_64BIT
35267 ? default_external_stack_protect_fail ()
35268 : default_hidden_stack_protect_fail ();
35271 /* Select a format to encode pointers in exception handling data. CODE
35272 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
35273 true if the symbol may be affected by dynamic relocations.
35275 ??? All x86 object file formats are capable of representing this.
35276 After all, the relocation needed is the same as for the call insn.
35277 Whether or not a particular assembler allows us to enter such, I
35278 guess we'll have to see. */
35280 asm_preferred_eh_data_format (int code
, int global
)
35284 int type
= DW_EH_PE_sdata8
;
35286 || ix86_cmodel
== CM_SMALL_PIC
35287 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
35288 type
= DW_EH_PE_sdata4
;
35289 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
35291 if (ix86_cmodel
== CM_SMALL
35292 || (ix86_cmodel
== CM_MEDIUM
&& code
))
35293 return DW_EH_PE_udata4
;
35294 return DW_EH_PE_absptr
;
35297 /* Expand copysign from SIGN to the positive value ABS_VALUE
35298 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
35301 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
35303 enum machine_mode mode
= GET_MODE (sign
);
35304 rtx sgn
= gen_reg_rtx (mode
);
35305 if (mask
== NULL_RTX
)
35307 enum machine_mode vmode
;
35309 if (mode
== SFmode
)
35311 else if (mode
== DFmode
)
35316 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
35317 if (!VECTOR_MODE_P (mode
))
35319 /* We need to generate a scalar mode mask in this case. */
35320 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35321 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35322 mask
= gen_reg_rtx (mode
);
35323 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35327 mask
= gen_rtx_NOT (mode
, mask
);
35328 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
35329 gen_rtx_AND (mode
, mask
, sign
)));
35330 emit_insn (gen_rtx_SET (VOIDmode
, result
,
35331 gen_rtx_IOR (mode
, abs_value
, sgn
)));
35334 /* Expand fabs (OP0) and return a new rtx that holds the result. The
35335 mask for masking out the sign-bit is stored in *SMASK, if that is
35338 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
35340 enum machine_mode vmode
, mode
= GET_MODE (op0
);
35343 xa
= gen_reg_rtx (mode
);
35344 if (mode
== SFmode
)
35346 else if (mode
== DFmode
)
35350 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
35351 if (!VECTOR_MODE_P (mode
))
35353 /* We need to generate a scalar mode mask in this case. */
35354 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35355 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35356 mask
= gen_reg_rtx (mode
);
35357 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35359 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
35360 gen_rtx_AND (mode
, op0
, mask
)));
35368 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
35369 swapping the operands if SWAP_OPERANDS is true. The expanded
35370 code is a forward jump to a newly created label in case the
35371 comparison is true. The generated label rtx is returned. */
35373 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
35374 bool swap_operands
)
35385 label
= gen_label_rtx ();
35386 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
35387 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35388 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
35389 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
35390 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
35391 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
35392 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35393 JUMP_LABEL (tmp
) = label
;
35398 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
35399 using comparison code CODE. Operands are swapped for the comparison if
35400 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
35402 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
35403 bool swap_operands
)
35405 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
35406 enum machine_mode mode
= GET_MODE (op0
);
35407 rtx mask
= gen_reg_rtx (mode
);
35416 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
35418 emit_insn (insn (mask
, op0
, op1
,
35419 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
35423 /* Generate and return a rtx of mode MODE for 2**n where n is the number
35424 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
35426 ix86_gen_TWO52 (enum machine_mode mode
)
35428 REAL_VALUE_TYPE TWO52r
;
35431 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
35432 TWO52
= const_double_from_real_value (TWO52r
, mode
);
35433 TWO52
= force_reg (mode
, TWO52
);
35438 /* Expand SSE sequence for computing lround from OP1 storing
35441 ix86_expand_lround (rtx op0
, rtx op1
)
35443 /* C code for the stuff we're doing below:
35444 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
35447 enum machine_mode mode
= GET_MODE (op1
);
35448 const struct real_format
*fmt
;
35449 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35452 /* load nextafter (0.5, 0.0) */
35453 fmt
= REAL_MODE_FORMAT (mode
);
35454 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35455 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35457 /* adj = copysign (0.5, op1) */
35458 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35459 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
35461 /* adj = op1 + adj */
35462 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35464 /* op0 = (imode)adj */
35465 expand_fix (op0
, adj
, 0);
35468 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35471 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
35473 /* C code for the stuff we're doing below (for do_floor):
35475 xi -= (double)xi > op1 ? 1 : 0;
35478 enum machine_mode fmode
= GET_MODE (op1
);
35479 enum machine_mode imode
= GET_MODE (op0
);
35480 rtx ireg
, freg
, label
, tmp
;
35482 /* reg = (long)op1 */
35483 ireg
= gen_reg_rtx (imode
);
35484 expand_fix (ireg
, op1
, 0);
35486 /* freg = (double)reg */
35487 freg
= gen_reg_rtx (fmode
);
35488 expand_float (freg
, ireg
, 0);
35490 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35491 label
= ix86_expand_sse_compare_and_jump (UNLE
,
35492 freg
, op1
, !do_floor
);
35493 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
35494 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
35495 emit_move_insn (ireg
, tmp
);
35497 emit_label (label
);
35498 LABEL_NUSES (label
) = 1;
35500 emit_move_insn (op0
, ireg
);
35503 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35504 result in OPERAND0. */
35506 ix86_expand_rint (rtx operand0
, rtx operand1
)
35508 /* C code for the stuff we're doing below:
35509 xa = fabs (operand1);
35510 if (!isless (xa, 2**52))
35512 xa = xa + 2**52 - 2**52;
35513 return copysign (xa, operand1);
35515 enum machine_mode mode
= GET_MODE (operand0
);
35516 rtx res
, xa
, label
, TWO52
, mask
;
35518 res
= gen_reg_rtx (mode
);
35519 emit_move_insn (res
, operand1
);
35521 /* xa = abs (operand1) */
35522 xa
= ix86_expand_sse_fabs (res
, &mask
);
35524 /* if (!isless (xa, TWO52)) goto label; */
35525 TWO52
= ix86_gen_TWO52 (mode
);
35526 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35528 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35529 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35531 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
35533 emit_label (label
);
35534 LABEL_NUSES (label
) = 1;
35536 emit_move_insn (operand0
, res
);
35539 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35542 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
35544 /* C code for the stuff we expand below.
35545 double xa = fabs (x), x2;
35546 if (!isless (xa, TWO52))
35548 xa = xa + TWO52 - TWO52;
35549 x2 = copysign (xa, x);
35558 enum machine_mode mode
= GET_MODE (operand0
);
35559 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
35561 TWO52
= ix86_gen_TWO52 (mode
);
35563 /* Temporary for holding the result, initialized to the input
35564 operand to ease control flow. */
35565 res
= gen_reg_rtx (mode
);
35566 emit_move_insn (res
, operand1
);
35568 /* xa = abs (operand1) */
35569 xa
= ix86_expand_sse_fabs (res
, &mask
);
35571 /* if (!isless (xa, TWO52)) goto label; */
35572 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35574 /* xa = xa + TWO52 - TWO52; */
35575 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35576 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35578 /* xa = copysign (xa, operand1) */
35579 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
35581 /* generate 1.0 or -1.0 */
35582 one
= force_reg (mode
,
35583 const_double_from_real_value (do_floor
35584 ? dconst1
: dconstm1
, mode
));
35586 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35587 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35588 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35589 gen_rtx_AND (mode
, one
, tmp
)));
35590 /* We always need to subtract here to preserve signed zero. */
35591 tmp
= expand_simple_binop (mode
, MINUS
,
35592 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35593 emit_move_insn (res
, tmp
);
35595 emit_label (label
);
35596 LABEL_NUSES (label
) = 1;
35598 emit_move_insn (operand0
, res
);
35601 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35604 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
35606 /* C code for the stuff we expand below.
35607 double xa = fabs (x), x2;
35608 if (!isless (xa, TWO52))
35610 x2 = (double)(long)x;
35617 if (HONOR_SIGNED_ZEROS (mode))
35618 return copysign (x2, x);
35621 enum machine_mode mode
= GET_MODE (operand0
);
35622 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
35624 TWO52
= ix86_gen_TWO52 (mode
);
35626 /* Temporary for holding the result, initialized to the input
35627 operand to ease control flow. */
35628 res
= gen_reg_rtx (mode
);
35629 emit_move_insn (res
, operand1
);
35631 /* xa = abs (operand1) */
35632 xa
= ix86_expand_sse_fabs (res
, &mask
);
35634 /* if (!isless (xa, TWO52)) goto label; */
35635 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35637 /* xa = (double)(long)x */
35638 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35639 expand_fix (xi
, res
, 0);
35640 expand_float (xa
, xi
, 0);
35643 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35645 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35646 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35647 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35648 gen_rtx_AND (mode
, one
, tmp
)));
35649 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
35650 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35651 emit_move_insn (res
, tmp
);
35653 if (HONOR_SIGNED_ZEROS (mode
))
35654 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35656 emit_label (label
);
35657 LABEL_NUSES (label
) = 1;
35659 emit_move_insn (operand0
, res
);
35662 /* Expand SSE sequence for computing round from OPERAND1 storing
35663 into OPERAND0. Sequence that works without relying on DImode truncation
35664 via cvttsd2siq that is only available on 64bit targets. */
35666 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
35668 /* C code for the stuff we expand below.
35669 double xa = fabs (x), xa2, x2;
35670 if (!isless (xa, TWO52))
35672 Using the absolute value and copying back sign makes
35673 -0.0 -> -0.0 correct.
35674 xa2 = xa + TWO52 - TWO52;
35679 else if (dxa > 0.5)
35681 x2 = copysign (xa2, x);
35684 enum machine_mode mode
= GET_MODE (operand0
);
35685 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
35687 TWO52
= ix86_gen_TWO52 (mode
);
35689 /* Temporary for holding the result, initialized to the input
35690 operand to ease control flow. */
35691 res
= gen_reg_rtx (mode
);
35692 emit_move_insn (res
, operand1
);
35694 /* xa = abs (operand1) */
35695 xa
= ix86_expand_sse_fabs (res
, &mask
);
35697 /* if (!isless (xa, TWO52)) goto label; */
35698 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35700 /* xa2 = xa + TWO52 - TWO52; */
35701 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35702 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
35704 /* dxa = xa2 - xa; */
35705 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
35707 /* generate 0.5, 1.0 and -0.5 */
35708 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
35709 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35710 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
35714 tmp
= gen_reg_rtx (mode
);
35715 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35716 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
35717 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35718 gen_rtx_AND (mode
, one
, tmp
)));
35719 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35720 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35721 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
35722 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35723 gen_rtx_AND (mode
, one
, tmp
)));
35724 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35726 /* res = copysign (xa2, operand1) */
35727 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
35729 emit_label (label
);
35730 LABEL_NUSES (label
) = 1;
35732 emit_move_insn (operand0
, res
);
35735 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35738 ix86_expand_trunc (rtx operand0
, rtx operand1
)
35740 /* C code for SSE variant we expand below.
35741 double xa = fabs (x), x2;
35742 if (!isless (xa, TWO52))
35744 x2 = (double)(long)x;
35745 if (HONOR_SIGNED_ZEROS (mode))
35746 return copysign (x2, x);
35749 enum machine_mode mode
= GET_MODE (operand0
);
35750 rtx xa
, xi
, TWO52
, label
, res
, mask
;
35752 TWO52
= ix86_gen_TWO52 (mode
);
35754 /* Temporary for holding the result, initialized to the input
35755 operand to ease control flow. */
35756 res
= gen_reg_rtx (mode
);
35757 emit_move_insn (res
, operand1
);
35759 /* xa = abs (operand1) */
35760 xa
= ix86_expand_sse_fabs (res
, &mask
);
35762 /* if (!isless (xa, TWO52)) goto label; */
35763 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35765 /* x = (double)(long)x */
35766 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35767 expand_fix (xi
, res
, 0);
35768 expand_float (res
, xi
, 0);
35770 if (HONOR_SIGNED_ZEROS (mode
))
35771 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35773 emit_label (label
);
35774 LABEL_NUSES (label
) = 1;
35776 emit_move_insn (operand0
, res
);
35779 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35782 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
35784 enum machine_mode mode
= GET_MODE (operand0
);
35785 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
35787 /* C code for SSE variant we expand below.
35788 double xa = fabs (x), x2;
35789 if (!isless (xa, TWO52))
35791 xa2 = xa + TWO52 - TWO52;
35795 x2 = copysign (xa2, x);
35799 TWO52
= ix86_gen_TWO52 (mode
);
35801 /* Temporary for holding the result, initialized to the input
35802 operand to ease control flow. */
35803 res
= gen_reg_rtx (mode
);
35804 emit_move_insn (res
, operand1
);
35806 /* xa = abs (operand1) */
35807 xa
= ix86_expand_sse_fabs (res
, &smask
);
35809 /* if (!isless (xa, TWO52)) goto label; */
35810 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35812 /* res = xa + TWO52 - TWO52; */
35813 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35814 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
35815 emit_move_insn (res
, tmp
);
35818 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35820 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35821 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
35822 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35823 gen_rtx_AND (mode
, mask
, one
)));
35824 tmp
= expand_simple_binop (mode
, MINUS
,
35825 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
35826 emit_move_insn (res
, tmp
);
35828 /* res = copysign (res, operand1) */
35829 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
35831 emit_label (label
);
35832 LABEL_NUSES (label
) = 1;
35834 emit_move_insn (operand0
, res
);
35837 /* Expand SSE sequence for computing round from OPERAND1 storing
35840 ix86_expand_round (rtx operand0
, rtx operand1
)
35842 /* C code for the stuff we're doing below:
35843 double xa = fabs (x);
35844 if (!isless (xa, TWO52))
35846 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35847 return copysign (xa, x);
35849 enum machine_mode mode
= GET_MODE (operand0
);
35850 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
35851 const struct real_format
*fmt
;
35852 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35854 /* Temporary for holding the result, initialized to the input
35855 operand to ease control flow. */
35856 res
= gen_reg_rtx (mode
);
35857 emit_move_insn (res
, operand1
);
35859 TWO52
= ix86_gen_TWO52 (mode
);
35860 xa
= ix86_expand_sse_fabs (res
, &mask
);
35861 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35863 /* load nextafter (0.5, 0.0) */
35864 fmt
= REAL_MODE_FORMAT (mode
);
35865 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35866 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35868 /* xa = xa + 0.5 */
35869 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35870 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35872 /* xa = (double)(int64_t)xa */
35873 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35874 expand_fix (xi
, xa
, 0);
35875 expand_float (xa
, xi
, 0);
35877 /* res = copysign (xa, operand1) */
35878 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
35880 emit_label (label
);
35881 LABEL_NUSES (label
) = 1;
35883 emit_move_insn (operand0
, res
);
35886 /* Expand SSE sequence for computing round
35887 from OP1 storing into OP0 using sse4 round insn. */
35889 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
35891 enum machine_mode mode
= GET_MODE (op0
);
35892 rtx e1
, e2
, res
, half
;
35893 const struct real_format
*fmt
;
35894 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35895 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
35896 rtx (*gen_round
) (rtx
, rtx
, rtx
);
35901 gen_copysign
= gen_copysignsf3
;
35902 gen_round
= gen_sse4_1_roundsf2
;
35905 gen_copysign
= gen_copysigndf3
;
35906 gen_round
= gen_sse4_1_rounddf2
;
35909 gcc_unreachable ();
35912 /* round (a) = trunc (a + copysign (0.5, a)) */
35914 /* load nextafter (0.5, 0.0) */
35915 fmt
= REAL_MODE_FORMAT (mode
);
35916 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35917 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35918 half
= const_double_from_real_value (pred_half
, mode
);
35920 /* e1 = copysign (0.5, op1) */
35921 e1
= gen_reg_rtx (mode
);
35922 emit_insn (gen_copysign (e1
, half
, op1
));
35924 /* e2 = op1 + e1 */
35925 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35927 /* res = trunc (e2) */
35928 res
= gen_reg_rtx (mode
);
35929 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
35931 emit_move_insn (op0
, res
);
35935 /* Table of valid machine attributes. */
35936 static const struct attribute_spec ix86_attribute_table
[] =
35938 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35939 affects_type_identity } */
35940 /* Stdcall attribute says callee is responsible for popping arguments
35941 if they are not variable. */
35942 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35944 /* Fastcall attribute says callee is responsible for popping arguments
35945 if they are not variable. */
35946 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35948 /* Thiscall attribute says callee is responsible for popping arguments
35949 if they are not variable. */
35950 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35952 /* Cdecl attribute says the callee is a normal C declaration */
35953 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35955 /* Regparm attribute specifies how many integer arguments are to be
35956 passed in registers. */
35957 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
35959 /* Sseregparm attribute says we are using x86_64 calling conventions
35960 for FP arguments. */
35961 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35963 /* The transactional memory builtins are implicitly regparm or fastcall
35964 depending on the ABI. Override the generic do-nothing attribute that
35965 these builtins were declared with. */
35966 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
35968 /* force_align_arg_pointer says this function realigns the stack at entry. */
35969 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
35970 false, true, true, ix86_handle_cconv_attribute
, false },
35971 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35972 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
35973 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
35974 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
35977 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35979 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35981 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35982 SUBTARGET_ATTRIBUTE_TABLE
,
35984 /* ms_abi and sysv_abi calling convention function attributes. */
35985 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35986 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35987 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
35989 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35990 ix86_handle_callee_pop_aggregate_return
, true },
35992 { NULL
, 0, 0, false, false, false, NULL
, false }
35995 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35997 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
35998 tree vectype ATTRIBUTE_UNUSED
,
35999 int misalign ATTRIBUTE_UNUSED
)
36001 switch (type_of_cost
)
36004 return ix86_cost
->scalar_stmt_cost
;
36007 return ix86_cost
->scalar_load_cost
;
36010 return ix86_cost
->scalar_store_cost
;
36013 return ix86_cost
->vec_stmt_cost
;
36016 return ix86_cost
->vec_align_load_cost
;
36019 return ix86_cost
->vec_store_cost
;
36021 case vec_to_scalar
:
36022 return ix86_cost
->vec_to_scalar_cost
;
36024 case scalar_to_vec
:
36025 return ix86_cost
->scalar_to_vec_cost
;
36027 case unaligned_load
:
36028 case unaligned_store
:
36029 return ix86_cost
->vec_unalign_load_cost
;
36031 case cond_branch_taken
:
36032 return ix86_cost
->cond_taken_branch_cost
;
36034 case cond_branch_not_taken
:
36035 return ix86_cost
->cond_not_taken_branch_cost
;
36038 case vec_promote_demote
:
36039 return ix86_cost
->vec_stmt_cost
;
36042 gcc_unreachable ();
36046 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
36047 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
36048 insn every time. */
36050 static GTY(()) rtx vselect_insn
;
36052 /* Initialize vselect_insn. */
36055 init_vselect_insn (void)
36060 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
36061 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
36062 XVECEXP (x
, 0, i
) = const0_rtx
;
36063 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
36065 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
36067 vselect_insn
= emit_insn (x
);
36071 /* Construct (set target (vec_select op0 (parallel perm))) and
36072 return true if that's a valid instruction in the active ISA. */
36075 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
36076 unsigned nelt
, bool testing_p
)
36079 rtx x
, save_vconcat
;
36082 if (vselect_insn
== NULL_RTX
)
36083 init_vselect_insn ();
36085 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
36086 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
36087 for (i
= 0; i
< nelt
; ++i
)
36088 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
36089 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36090 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
36091 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
36092 SET_DEST (PATTERN (vselect_insn
)) = target
;
36093 icode
= recog_memoized (vselect_insn
);
36095 if (icode
>= 0 && !testing_p
)
36096 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
36098 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
36099 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
36100 INSN_CODE (vselect_insn
) = -1;
36105 /* Similar, but generate a vec_concat from op0 and op1 as well. */
36108 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
36109 const unsigned char *perm
, unsigned nelt
,
36112 enum machine_mode v2mode
;
36116 if (vselect_insn
== NULL_RTX
)
36117 init_vselect_insn ();
36119 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
36120 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36121 PUT_MODE (x
, v2mode
);
36124 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
36125 XEXP (x
, 0) = const0_rtx
;
36126 XEXP (x
, 1) = const0_rtx
;
36130 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36131 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
36134 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
36136 enum machine_mode vmode
= d
->vmode
;
36137 unsigned i
, mask
, nelt
= d
->nelt
;
36138 rtx target
, op0
, op1
, x
;
36139 rtx rperm
[32], vperm
;
36141 if (d
->one_operand_p
)
36143 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
36145 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
36147 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
36152 /* This is a blend, not a permute. Elements must stay in their
36153 respective lanes. */
36154 for (i
= 0; i
< nelt
; ++i
)
36156 unsigned e
= d
->perm
[i
];
36157 if (!(e
== i
|| e
== i
+ nelt
))
36164 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
36165 decision should be extracted elsewhere, so that we only try that
36166 sequence once all budget==3 options have been tried. */
36167 target
= d
->target
;
36180 for (i
= 0; i
< nelt
; ++i
)
36181 mask
|= (d
->perm
[i
] >= nelt
) << i
;
36185 for (i
= 0; i
< 2; ++i
)
36186 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
36191 for (i
= 0; i
< 4; ++i
)
36192 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36197 /* See if bytes move in pairs so we can use pblendw with
36198 an immediate argument, rather than pblendvb with a vector
36200 for (i
= 0; i
< 16; i
+= 2)
36201 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36204 for (i
= 0; i
< nelt
; ++i
)
36205 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
36208 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
36209 vperm
= force_reg (vmode
, vperm
);
36211 if (GET_MODE_SIZE (vmode
) == 16)
36212 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
36214 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
36218 for (i
= 0; i
< 8; ++i
)
36219 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36224 target
= gen_lowpart (vmode
, target
);
36225 op0
= gen_lowpart (vmode
, op0
);
36226 op1
= gen_lowpart (vmode
, op1
);
36230 /* See if bytes move in pairs. If not, vpblendvb must be used. */
36231 for (i
= 0; i
< 32; i
+= 2)
36232 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36234 /* See if bytes move in quadruplets. If yes, vpblendd
36235 with immediate can be used. */
36236 for (i
= 0; i
< 32; i
+= 4)
36237 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
36241 /* See if bytes move the same in both lanes. If yes,
36242 vpblendw with immediate can be used. */
36243 for (i
= 0; i
< 16; i
+= 2)
36244 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
36247 /* Use vpblendw. */
36248 for (i
= 0; i
< 16; ++i
)
36249 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
36254 /* Use vpblendd. */
36255 for (i
= 0; i
< 8; ++i
)
36256 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
36261 /* See if words move in pairs. If yes, vpblendd can be used. */
36262 for (i
= 0; i
< 16; i
+= 2)
36263 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36267 /* See if words move the same in both lanes. If not,
36268 vpblendvb must be used. */
36269 for (i
= 0; i
< 8; i
++)
36270 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
36272 /* Use vpblendvb. */
36273 for (i
= 0; i
< 32; ++i
)
36274 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
36278 target
= gen_lowpart (vmode
, target
);
36279 op0
= gen_lowpart (vmode
, op0
);
36280 op1
= gen_lowpart (vmode
, op1
);
36281 goto finish_pblendvb
;
36284 /* Use vpblendw. */
36285 for (i
= 0; i
< 16; ++i
)
36286 mask
|= (d
->perm
[i
] >= 16) << i
;
36290 /* Use vpblendd. */
36291 for (i
= 0; i
< 8; ++i
)
36292 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36297 /* Use vpblendd. */
36298 for (i
= 0; i
< 4; ++i
)
36299 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36304 gcc_unreachable ();
36307 /* This matches five different patterns with the different modes. */
36308 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
36309 x
= gen_rtx_SET (VOIDmode
, target
, x
);
36315 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36316 in terms of the variable form of vpermilps.
36318 Note that we will have already failed the immediate input vpermilps,
36319 which requires that the high and low part shuffle be identical; the
36320 variable form doesn't require that. */
36323 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
36325 rtx rperm
[8], vperm
;
36328 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
36331 /* We can only permute within the 128-bit lane. */
36332 for (i
= 0; i
< 8; ++i
)
36334 unsigned e
= d
->perm
[i
];
36335 if (i
< 4 ? e
>= 4 : e
< 4)
36342 for (i
= 0; i
< 8; ++i
)
36344 unsigned e
= d
->perm
[i
];
36346 /* Within each 128-bit lane, the elements of op0 are numbered
36347 from 0 and the elements of op1 are numbered from 4. */
36353 rperm
[i
] = GEN_INT (e
);
36356 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
36357 vperm
= force_reg (V8SImode
, vperm
);
36358 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
36363 /* Return true if permutation D can be performed as VMODE permutation
36367 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
36369 unsigned int i
, j
, chunk
;
36371 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
36372 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
36373 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
36376 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
36379 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
36380 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
36381 if (d
->perm
[i
] & (chunk
- 1))
36384 for (j
= 1; j
< chunk
; ++j
)
36385 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
36391 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36392 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
36395 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
36397 unsigned i
, nelt
, eltsz
, mask
;
36398 unsigned char perm
[32];
36399 enum machine_mode vmode
= V16QImode
;
36400 rtx rperm
[32], vperm
, target
, op0
, op1
;
36404 if (!d
->one_operand_p
)
36406 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
36409 && valid_perm_using_mode_p (V2TImode
, d
))
36414 /* Use vperm2i128 insn. The pattern uses
36415 V4DImode instead of V2TImode. */
36416 target
= gen_lowpart (V4DImode
, d
->target
);
36417 op0
= gen_lowpart (V4DImode
, d
->op0
);
36418 op1
= gen_lowpart (V4DImode
, d
->op1
);
36420 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
36421 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
36422 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
36430 if (GET_MODE_SIZE (d
->vmode
) == 16)
36435 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36440 /* V4DImode should be already handled through
36441 expand_vselect by vpermq instruction. */
36442 gcc_assert (d
->vmode
!= V4DImode
);
36445 if (d
->vmode
== V8SImode
36446 || d
->vmode
== V16HImode
36447 || d
->vmode
== V32QImode
)
36449 /* First see if vpermq can be used for
36450 V8SImode/V16HImode/V32QImode. */
36451 if (valid_perm_using_mode_p (V4DImode
, d
))
36453 for (i
= 0; i
< 4; i
++)
36454 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
36457 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
36458 gen_lowpart (V4DImode
, d
->op0
),
36462 /* Next see if vpermd can be used. */
36463 if (valid_perm_using_mode_p (V8SImode
, d
))
36466 /* Or if vpermps can be used. */
36467 else if (d
->vmode
== V8SFmode
)
36470 if (vmode
== V32QImode
)
36472 /* vpshufb only works intra lanes, it is not
36473 possible to shuffle bytes in between the lanes. */
36474 for (i
= 0; i
< nelt
; ++i
)
36475 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
36486 if (vmode
== V8SImode
)
36487 for (i
= 0; i
< 8; ++i
)
36488 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
36491 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36492 if (!d
->one_operand_p
)
36493 mask
= 2 * nelt
- 1;
36494 else if (vmode
== V16QImode
)
36497 mask
= nelt
/ 2 - 1;
36499 for (i
= 0; i
< nelt
; ++i
)
36501 unsigned j
, e
= d
->perm
[i
] & mask
;
36502 for (j
= 0; j
< eltsz
; ++j
)
36503 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
36507 vperm
= gen_rtx_CONST_VECTOR (vmode
,
36508 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
36509 vperm
= force_reg (vmode
, vperm
);
36511 if (vmode
== V8SImode
&& d
->vmode
== V8SFmode
)
36514 vperm
= gen_lowpart (vmode
, vperm
);
36517 target
= gen_lowpart (vmode
, d
->target
);
36518 op0
= gen_lowpart (vmode
, d
->op0
);
36519 if (d
->one_operand_p
)
36521 if (vmode
== V16QImode
)
36522 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
36523 else if (vmode
== V32QImode
)
36524 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
36525 else if (vmode
== V8SFmode
)
36526 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
36528 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
36532 op1
= gen_lowpart (vmode
, d
->op1
);
36533 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
36539 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36540 in a single instruction. */
36543 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
36545 unsigned i
, nelt
= d
->nelt
;
36546 unsigned char perm2
[MAX_VECT_LEN
];
36548 /* Check plain VEC_SELECT first, because AVX has instructions that could
36549 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36550 input where SEL+CONCAT may not. */
36551 if (d
->one_operand_p
)
36553 int mask
= nelt
- 1;
36554 bool identity_perm
= true;
36555 bool broadcast_perm
= true;
36557 for (i
= 0; i
< nelt
; i
++)
36559 perm2
[i
] = d
->perm
[i
] & mask
;
36561 identity_perm
= false;
36563 broadcast_perm
= false;
36569 emit_move_insn (d
->target
, d
->op0
);
36572 else if (broadcast_perm
&& TARGET_AVX2
)
36574 /* Use vpbroadcast{b,w,d}. */
36575 rtx (*gen
) (rtx
, rtx
) = NULL
;
36579 gen
= gen_avx2_pbroadcastv32qi_1
;
36582 gen
= gen_avx2_pbroadcastv16hi_1
;
36585 gen
= gen_avx2_pbroadcastv8si_1
;
36588 gen
= gen_avx2_pbroadcastv16qi
;
36591 gen
= gen_avx2_pbroadcastv8hi
;
36594 gen
= gen_avx2_vec_dupv8sf_1
;
36596 /* For other modes prefer other shuffles this function creates. */
36602 emit_insn (gen (d
->target
, d
->op0
));
36607 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
36610 /* There are plenty of patterns in sse.md that are written for
36611 SEL+CONCAT and are not replicated for a single op. Perhaps
36612 that should be changed, to avoid the nastiness here. */
36614 /* Recognize interleave style patterns, which means incrementing
36615 every other permutation operand. */
36616 for (i
= 0; i
< nelt
; i
+= 2)
36618 perm2
[i
] = d
->perm
[i
] & mask
;
36619 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
36621 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
36625 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36628 for (i
= 0; i
< nelt
; i
+= 4)
36630 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
36631 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
36632 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
36633 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
36636 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
36642 /* Finally, try the fully general two operand permute. */
36643 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
36647 /* Recognize interleave style patterns with reversed operands. */
36648 if (!d
->one_operand_p
)
36650 for (i
= 0; i
< nelt
; ++i
)
36652 unsigned e
= d
->perm
[i
];
36660 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
36665 /* Try the SSE4.1 blend variable merge instructions. */
36666 if (expand_vec_perm_blend (d
))
36669 /* Try one of the AVX vpermil variable permutations. */
36670 if (expand_vec_perm_vpermil (d
))
36673 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36674 vpshufb, vpermd, vpermps or vpermq variable permutation. */
36675 if (expand_vec_perm_pshufb (d
))
36681 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36682 in terms of a pair of pshuflw + pshufhw instructions. */
36685 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
36687 unsigned char perm2
[MAX_VECT_LEN
];
36691 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
36694 /* The two permutations only operate in 64-bit lanes. */
36695 for (i
= 0; i
< 4; ++i
)
36696 if (d
->perm
[i
] >= 4)
36698 for (i
= 4; i
< 8; ++i
)
36699 if (d
->perm
[i
] < 4)
36705 /* Emit the pshuflw. */
36706 memcpy (perm2
, d
->perm
, 4);
36707 for (i
= 4; i
< 8; ++i
)
36709 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
36712 /* Emit the pshufhw. */
36713 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
36714 for (i
= 0; i
< 4; ++i
)
36716 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
36722 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36723 the permutation using the SSSE3 palignr instruction. This succeeds
36724 when all of the elements in PERM fit within one vector and we merely
36725 need to shift them down so that a single vector permutation has a
36726 chance to succeed. */
36729 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
36731 unsigned i
, nelt
= d
->nelt
;
36736 /* Even with AVX, palignr only operates on 128-bit vectors. */
36737 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36740 min
= nelt
, max
= 0;
36741 for (i
= 0; i
< nelt
; ++i
)
36743 unsigned e
= d
->perm
[i
];
36749 if (min
== 0 || max
- min
>= nelt
)
36752 /* Given that we have SSSE3, we know we'll be able to implement the
36753 single operand permutation after the palignr with pshufb. */
36757 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
36758 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
36759 gen_lowpart (TImode
, d
->op1
),
36760 gen_lowpart (TImode
, d
->op0
), shift
));
36762 d
->op0
= d
->op1
= d
->target
;
36763 d
->one_operand_p
= true;
36766 for (i
= 0; i
< nelt
; ++i
)
36768 unsigned e
= d
->perm
[i
] - min
;
36774 /* Test for the degenerate case where the alignment by itself
36775 produces the desired permutation. */
36779 ok
= expand_vec_perm_1 (d
);
36785 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
36787 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36788 a two vector permutation into a single vector permutation by using
36789 an interleave operation to merge the vectors. */
36792 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
36794 struct expand_vec_perm_d dremap
, dfinal
;
36795 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36796 unsigned HOST_WIDE_INT contents
;
36797 unsigned char remap
[2 * MAX_VECT_LEN
];
36799 bool ok
, same_halves
= false;
36801 if (GET_MODE_SIZE (d
->vmode
) == 16)
36803 if (d
->one_operand_p
)
36806 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36810 /* For 32-byte modes allow even d->one_operand_p.
36811 The lack of cross-lane shuffling in some instructions
36812 might prevent a single insn shuffle. */
36814 dfinal
.testing_p
= true;
36815 /* If expand_vec_perm_interleave3 can expand this into
36816 a 3 insn sequence, give up and let it be expanded as
36817 3 insn sequence. While that is one insn longer,
36818 it doesn't need a memory operand and in the common
36819 case that both interleave low and high permutations
36820 with the same operands are adjacent needs 4 insns
36821 for both after CSE. */
36822 if (expand_vec_perm_interleave3 (&dfinal
))
36828 /* Examine from whence the elements come. */
36830 for (i
= 0; i
< nelt
; ++i
)
36831 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
36833 memset (remap
, 0xff, sizeof (remap
));
36836 if (GET_MODE_SIZE (d
->vmode
) == 16)
36838 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
36840 /* Split the two input vectors into 4 halves. */
36841 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
36846 /* If the elements from the low halves use interleave low, and similarly
36847 for interleave high. If the elements are from mis-matched halves, we
36848 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36849 if ((contents
& (h1
| h3
)) == contents
)
36852 for (i
= 0; i
< nelt2
; ++i
)
36855 remap
[i
+ nelt
] = i
* 2 + 1;
36856 dremap
.perm
[i
* 2] = i
;
36857 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36859 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36860 dremap
.vmode
= V4SFmode
;
36862 else if ((contents
& (h2
| h4
)) == contents
)
36865 for (i
= 0; i
< nelt2
; ++i
)
36867 remap
[i
+ nelt2
] = i
* 2;
36868 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
36869 dremap
.perm
[i
* 2] = i
+ nelt2
;
36870 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
36872 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36873 dremap
.vmode
= V4SFmode
;
36875 else if ((contents
& (h1
| h4
)) == contents
)
36878 for (i
= 0; i
< nelt2
; ++i
)
36881 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
36882 dremap
.perm
[i
] = i
;
36883 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
36888 dremap
.vmode
= V2DImode
;
36890 dremap
.perm
[0] = 0;
36891 dremap
.perm
[1] = 3;
36894 else if ((contents
& (h2
| h3
)) == contents
)
36897 for (i
= 0; i
< nelt2
; ++i
)
36899 remap
[i
+ nelt2
] = i
;
36900 remap
[i
+ nelt
] = i
+ nelt2
;
36901 dremap
.perm
[i
] = i
+ nelt2
;
36902 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
36907 dremap
.vmode
= V2DImode
;
36909 dremap
.perm
[0] = 1;
36910 dremap
.perm
[1] = 2;
36918 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
36919 unsigned HOST_WIDE_INT q
[8];
36920 unsigned int nonzero_halves
[4];
36922 /* Split the two input vectors into 8 quarters. */
36923 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
36924 for (i
= 1; i
< 8; ++i
)
36925 q
[i
] = q
[0] << (nelt4
* i
);
36926 for (i
= 0; i
< 4; ++i
)
36927 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
36929 nonzero_halves
[nzcnt
] = i
;
36935 gcc_assert (d
->one_operand_p
);
36936 nonzero_halves
[1] = nonzero_halves
[0];
36937 same_halves
= true;
36939 else if (d
->one_operand_p
)
36941 gcc_assert (nonzero_halves
[0] == 0);
36942 gcc_assert (nonzero_halves
[1] == 1);
36947 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
36949 /* Attempt to increase the likelyhood that dfinal
36950 shuffle will be intra-lane. */
36951 char tmph
= nonzero_halves
[0];
36952 nonzero_halves
[0] = nonzero_halves
[1];
36953 nonzero_halves
[1] = tmph
;
36956 /* vperm2f128 or vperm2i128. */
36957 for (i
= 0; i
< nelt2
; ++i
)
36959 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
36960 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
36961 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
36962 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
36965 if (d
->vmode
!= V8SFmode
36966 && d
->vmode
!= V4DFmode
36967 && d
->vmode
!= V8SImode
)
36969 dremap
.vmode
= V8SImode
;
36971 for (i
= 0; i
< 4; ++i
)
36973 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
36974 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
36978 else if (d
->one_operand_p
)
36980 else if (TARGET_AVX2
36981 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
36984 for (i
= 0; i
< nelt4
; ++i
)
36987 remap
[i
+ nelt
] = i
* 2 + 1;
36988 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
36989 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
36990 dremap
.perm
[i
* 2] = i
;
36991 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36992 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
36993 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
36996 else if (TARGET_AVX2
36997 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
37000 for (i
= 0; i
< nelt4
; ++i
)
37002 remap
[i
+ nelt4
] = i
* 2;
37003 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
37004 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
37005 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
37006 dremap
.perm
[i
* 2] = i
+ nelt4
;
37007 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
37008 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
37009 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
37016 /* Use the remapping array set up above to move the elements from their
37017 swizzled locations into their final destinations. */
37019 for (i
= 0; i
< nelt
; ++i
)
37021 unsigned e
= remap
[d
->perm
[i
]];
37022 gcc_assert (e
< nelt
);
37023 /* If same_halves is true, both halves of the remapped vector are the
37024 same. Avoid cross-lane accesses if possible. */
37025 if (same_halves
&& i
>= nelt2
)
37027 gcc_assert (e
< nelt2
);
37028 dfinal
.perm
[i
] = e
+ nelt2
;
37031 dfinal
.perm
[i
] = e
;
37033 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
37034 dfinal
.op1
= dfinal
.op0
;
37035 dfinal
.one_operand_p
= true;
37036 dremap
.target
= dfinal
.op0
;
37038 /* Test if the final remap can be done with a single insn. For V4SFmode or
37039 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
37041 ok
= expand_vec_perm_1 (&dfinal
);
37042 seq
= get_insns ();
37051 if (dremap
.vmode
!= dfinal
.vmode
)
37053 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
37054 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
37055 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
37058 ok
= expand_vec_perm_1 (&dremap
);
37065 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37066 a single vector cross-lane permutation into vpermq followed
37067 by any of the single insn permutations. */
37070 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
37072 struct expand_vec_perm_d dremap
, dfinal
;
37073 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
37074 unsigned contents
[2];
37078 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
37079 && d
->one_operand_p
))
37084 for (i
= 0; i
< nelt2
; ++i
)
37086 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
37087 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
37090 for (i
= 0; i
< 2; ++i
)
37092 unsigned int cnt
= 0;
37093 for (j
= 0; j
< 4; ++j
)
37094 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
37102 dremap
.vmode
= V4DImode
;
37104 dremap
.target
= gen_reg_rtx (V4DImode
);
37105 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
37106 dremap
.op1
= dremap
.op0
;
37107 dremap
.one_operand_p
= true;
37108 for (i
= 0; i
< 2; ++i
)
37110 unsigned int cnt
= 0;
37111 for (j
= 0; j
< 4; ++j
)
37112 if ((contents
[i
] & (1u << j
)) != 0)
37113 dremap
.perm
[2 * i
+ cnt
++] = j
;
37114 for (; cnt
< 2; ++cnt
)
37115 dremap
.perm
[2 * i
+ cnt
] = 0;
37119 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
37120 dfinal
.op1
= dfinal
.op0
;
37121 dfinal
.one_operand_p
= true;
37122 for (i
= 0, j
= 0; i
< nelt
; ++i
)
37126 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
37127 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
37129 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
37130 dfinal
.perm
[i
] |= nelt4
;
37132 gcc_unreachable ();
37135 ok
= expand_vec_perm_1 (&dremap
);
37138 ok
= expand_vec_perm_1 (&dfinal
);
37144 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
37145 a vector permutation using two instructions, vperm2f128 resp.
37146 vperm2i128 followed by any single in-lane permutation. */
37149 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
37151 struct expand_vec_perm_d dfirst
, dsecond
;
37152 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
37156 || GET_MODE_SIZE (d
->vmode
) != 32
37157 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
37161 dsecond
.one_operand_p
= false;
37162 dsecond
.testing_p
= true;
37164 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
37165 immediate. For perm < 16 the second permutation uses
37166 d->op0 as first operand, for perm >= 16 it uses d->op1
37167 as first operand. The second operand is the result of
37169 for (perm
= 0; perm
< 32; perm
++)
37171 /* Ignore permutations which do not move anything cross-lane. */
37174 /* The second shuffle for e.g. V4DFmode has
37175 0123 and ABCD operands.
37176 Ignore AB23, as 23 is already in the second lane
37177 of the first operand. */
37178 if ((perm
& 0xc) == (1 << 2)) continue;
37179 /* And 01CD, as 01 is in the first lane of the first
37181 if ((perm
& 3) == 0) continue;
37182 /* And 4567, as then the vperm2[fi]128 doesn't change
37183 anything on the original 4567 second operand. */
37184 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
37188 /* The second shuffle for e.g. V4DFmode has
37189 4567 and ABCD operands.
37190 Ignore AB67, as 67 is already in the second lane
37191 of the first operand. */
37192 if ((perm
& 0xc) == (3 << 2)) continue;
37193 /* And 45CD, as 45 is in the first lane of the first
37195 if ((perm
& 3) == 2) continue;
37196 /* And 0123, as then the vperm2[fi]128 doesn't change
37197 anything on the original 0123 first operand. */
37198 if ((perm
& 0xf) == (1 << 2)) continue;
37201 for (i
= 0; i
< nelt
; i
++)
37203 j
= d
->perm
[i
] / nelt2
;
37204 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
37205 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
37206 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
37207 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
37215 ok
= expand_vec_perm_1 (&dsecond
);
37226 /* Found a usable second shuffle. dfirst will be
37227 vperm2f128 on d->op0 and d->op1. */
37228 dsecond
.testing_p
= false;
37230 dfirst
.target
= gen_reg_rtx (d
->vmode
);
37231 for (i
= 0; i
< nelt
; i
++)
37232 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
37233 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
37235 ok
= expand_vec_perm_1 (&dfirst
);
37238 /* And dsecond is some single insn shuffle, taking
37239 d->op0 and result of vperm2f128 (if perm < 16) or
37240 d->op1 and result of vperm2f128 (otherwise). */
37241 dsecond
.op1
= dfirst
.target
;
37243 dsecond
.op0
= dfirst
.op1
;
37245 ok
= expand_vec_perm_1 (&dsecond
);
37251 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
37252 if (d
->one_operand_p
)
37259 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37260 a two vector permutation using 2 intra-lane interleave insns
37261 and cross-lane shuffle for 32-byte vectors. */
37264 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
37267 rtx (*gen
) (rtx
, rtx
, rtx
);
37269 if (d
->one_operand_p
)
37271 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
37273 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
37279 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
37281 for (i
= 0; i
< nelt
; i
+= 2)
37282 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
37283 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
37293 gen
= gen_vec_interleave_highv32qi
;
37295 gen
= gen_vec_interleave_lowv32qi
;
37299 gen
= gen_vec_interleave_highv16hi
;
37301 gen
= gen_vec_interleave_lowv16hi
;
37305 gen
= gen_vec_interleave_highv8si
;
37307 gen
= gen_vec_interleave_lowv8si
;
37311 gen
= gen_vec_interleave_highv4di
;
37313 gen
= gen_vec_interleave_lowv4di
;
37317 gen
= gen_vec_interleave_highv8sf
;
37319 gen
= gen_vec_interleave_lowv8sf
;
37323 gen
= gen_vec_interleave_highv4df
;
37325 gen
= gen_vec_interleave_lowv4df
;
37328 gcc_unreachable ();
37331 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
37335 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
37336 a single vector permutation using a single intra-lane vector
37337 permutation, vperm2f128 swapping the lanes and vblend* insn blending
37338 the non-swapped and swapped vectors together. */
37341 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
37343 struct expand_vec_perm_d dfirst
, dsecond
;
37344 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
37347 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
37351 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
37352 || !d
->one_operand_p
)
37356 for (i
= 0; i
< nelt
; i
++)
37357 dfirst
.perm
[i
] = 0xff;
37358 for (i
= 0, msk
= 0; i
< nelt
; i
++)
37360 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
37361 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
37363 dfirst
.perm
[j
] = d
->perm
[i
];
37367 for (i
= 0; i
< nelt
; i
++)
37368 if (dfirst
.perm
[i
] == 0xff)
37369 dfirst
.perm
[i
] = i
;
37372 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
37375 ok
= expand_vec_perm_1 (&dfirst
);
37376 seq
= get_insns ();
37388 dsecond
.op0
= dfirst
.target
;
37389 dsecond
.op1
= dfirst
.target
;
37390 dsecond
.one_operand_p
= true;
37391 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
37392 for (i
= 0; i
< nelt
; i
++)
37393 dsecond
.perm
[i
] = i
^ nelt2
;
37395 ok
= expand_vec_perm_1 (&dsecond
);
37398 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
37399 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
37403 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
37404 permutation with two pshufb insns and an ior. We should have already
37405 failed all two instruction sequences. */
37408 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
37410 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
37411 unsigned int i
, nelt
, eltsz
;
37413 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
37415 gcc_assert (!d
->one_operand_p
);
37418 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37420 /* Generate two permutation masks. If the required element is within
37421 the given vector it is shuffled into the proper lane. If the required
37422 element is in the other vector, force a zero into the lane by setting
37423 bit 7 in the permutation mask. */
37424 m128
= GEN_INT (-128);
37425 for (i
= 0; i
< nelt
; ++i
)
37427 unsigned j
, e
= d
->perm
[i
];
37428 unsigned which
= (e
>= nelt
);
37432 for (j
= 0; j
< eltsz
; ++j
)
37434 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
37435 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
37439 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
37440 vperm
= force_reg (V16QImode
, vperm
);
37442 l
= gen_reg_rtx (V16QImode
);
37443 op
= gen_lowpart (V16QImode
, d
->op0
);
37444 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
37446 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
37447 vperm
= force_reg (V16QImode
, vperm
);
37449 h
= gen_reg_rtx (V16QImode
);
37450 op
= gen_lowpart (V16QImode
, d
->op1
);
37451 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
37453 op
= gen_lowpart (V16QImode
, d
->target
);
37454 emit_insn (gen_iorv16qi3 (op
, l
, h
));
37459 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
37460 with two vpshufb insns, vpermq and vpor. We should have already failed
37461 all two or three instruction sequences. */
37464 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
37466 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
37467 unsigned int i
, nelt
, eltsz
;
37470 || !d
->one_operand_p
37471 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37478 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37480 /* Generate two permutation masks. If the required element is within
37481 the same lane, it is shuffled in. If the required element from the
37482 other lane, force a zero by setting bit 7 in the permutation mask.
37483 In the other mask the mask has non-negative elements if element
37484 is requested from the other lane, but also moved to the other lane,
37485 so that the result of vpshufb can have the two V2TImode halves
37487 m128
= GEN_INT (-128);
37488 for (i
= 0; i
< nelt
; ++i
)
37490 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37491 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37493 for (j
= 0; j
< eltsz
; ++j
)
37495 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
37496 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
37500 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37501 vperm
= force_reg (V32QImode
, vperm
);
37503 h
= gen_reg_rtx (V32QImode
);
37504 op
= gen_lowpart (V32QImode
, d
->op0
);
37505 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37507 /* Swap the 128-byte lanes of h into hp. */
37508 hp
= gen_reg_rtx (V4DImode
);
37509 op
= gen_lowpart (V4DImode
, h
);
37510 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
37513 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37514 vperm
= force_reg (V32QImode
, vperm
);
37516 l
= gen_reg_rtx (V32QImode
);
37517 op
= gen_lowpart (V32QImode
, d
->op0
);
37518 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37520 op
= gen_lowpart (V32QImode
, d
->target
);
37521 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
37526 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
37527 and extract-odd permutations of two V32QImode and V16QImode operand
37528 with two vpshufb insns, vpor and vpermq. We should have already
37529 failed all two or three instruction sequences. */
37532 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
37534 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
37535 unsigned int i
, nelt
, eltsz
;
37538 || d
->one_operand_p
37539 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37542 for (i
= 0; i
< d
->nelt
; ++i
)
37543 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
37550 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37552 /* Generate two permutation masks. In the first permutation mask
37553 the first quarter will contain indexes for the first half
37554 of the op0, the second quarter will contain bit 7 set, third quarter
37555 will contain indexes for the second half of the op0 and the
37556 last quarter bit 7 set. In the second permutation mask
37557 the first quarter will contain bit 7 set, the second quarter
37558 indexes for the first half of the op1, the third quarter bit 7 set
37559 and last quarter indexes for the second half of the op1.
37560 I.e. the first mask e.g. for V32QImode extract even will be:
37561 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
37562 (all values masked with 0xf except for -128) and second mask
37563 for extract even will be
37564 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
37565 m128
= GEN_INT (-128);
37566 for (i
= 0; i
< nelt
; ++i
)
37568 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37569 unsigned which
= d
->perm
[i
] >= nelt
;
37570 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
37572 for (j
= 0; j
< eltsz
; ++j
)
37574 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
37575 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
37579 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37580 vperm
= force_reg (V32QImode
, vperm
);
37582 l
= gen_reg_rtx (V32QImode
);
37583 op
= gen_lowpart (V32QImode
, d
->op0
);
37584 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37586 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37587 vperm
= force_reg (V32QImode
, vperm
);
37589 h
= gen_reg_rtx (V32QImode
);
37590 op
= gen_lowpart (V32QImode
, d
->op1
);
37591 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37593 ior
= gen_reg_rtx (V32QImode
);
37594 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
37596 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
37597 op
= gen_lowpart (V4DImode
, d
->target
);
37598 ior
= gen_lowpart (V4DImode
, ior
);
37599 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
37600 const1_rtx
, GEN_INT (3)));
37605 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
37606 and extract-odd permutations. */
37609 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
37616 t1
= gen_reg_rtx (V4DFmode
);
37617 t2
= gen_reg_rtx (V4DFmode
);
37619 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37620 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37621 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37623 /* Now an unpck[lh]pd will produce the result required. */
37625 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
37627 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
37633 int mask
= odd
? 0xdd : 0x88;
37635 t1
= gen_reg_rtx (V8SFmode
);
37636 t2
= gen_reg_rtx (V8SFmode
);
37637 t3
= gen_reg_rtx (V8SFmode
);
37639 /* Shuffle within the 128-bit lanes to produce:
37640 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
37641 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
37644 /* Shuffle the lanes around to produce:
37645 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
37646 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
37649 /* Shuffle within the 128-bit lanes to produce:
37650 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
37651 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
37653 /* Shuffle within the 128-bit lanes to produce:
37654 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
37655 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
37657 /* Shuffle the lanes around to produce:
37658 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
37659 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
37668 /* These are always directly implementable by expand_vec_perm_1. */
37669 gcc_unreachable ();
37673 return expand_vec_perm_pshufb2 (d
);
37676 /* We need 2*log2(N)-1 operations to achieve odd/even
37677 with interleave. */
37678 t1
= gen_reg_rtx (V8HImode
);
37679 t2
= gen_reg_rtx (V8HImode
);
37680 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
37681 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
37682 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
37683 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
37685 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
37687 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
37694 return expand_vec_perm_pshufb2 (d
);
37697 t1
= gen_reg_rtx (V16QImode
);
37698 t2
= gen_reg_rtx (V16QImode
);
37699 t3
= gen_reg_rtx (V16QImode
);
37700 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
37701 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
37702 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
37703 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
37704 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
37705 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
37707 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
37709 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
37716 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
37721 struct expand_vec_perm_d d_copy
= *d
;
37722 d_copy
.vmode
= V4DFmode
;
37723 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
37724 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
37725 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
37726 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37729 t1
= gen_reg_rtx (V4DImode
);
37730 t2
= gen_reg_rtx (V4DImode
);
37732 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37733 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37734 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37736 /* Now an vpunpck[lh]qdq will produce the result required. */
37738 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
37740 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
37747 struct expand_vec_perm_d d_copy
= *d
;
37748 d_copy
.vmode
= V8SFmode
;
37749 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
37750 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
37751 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
37752 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37755 t1
= gen_reg_rtx (V8SImode
);
37756 t2
= gen_reg_rtx (V8SImode
);
37758 /* Shuffle the lanes around into
37759 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37760 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
37761 gen_lowpart (V4DImode
, d
->op0
),
37762 gen_lowpart (V4DImode
, d
->op1
),
37764 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
37765 gen_lowpart (V4DImode
, d
->op0
),
37766 gen_lowpart (V4DImode
, d
->op1
),
37769 /* Swap the 2nd and 3rd position in each lane into
37770 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37771 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
37772 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37773 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
37774 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37776 /* Now an vpunpck[lh]qdq will produce
37777 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37779 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
37780 gen_lowpart (V4DImode
, t1
),
37781 gen_lowpart (V4DImode
, t2
));
37783 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
37784 gen_lowpart (V4DImode
, t1
),
37785 gen_lowpart (V4DImode
, t2
));
37790 gcc_unreachable ();
37796 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37797 extract-even and extract-odd permutations. */
37800 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
37802 unsigned i
, odd
, nelt
= d
->nelt
;
37805 if (odd
!= 0 && odd
!= 1)
37808 for (i
= 1; i
< nelt
; ++i
)
37809 if (d
->perm
[i
] != 2 * i
+ odd
)
37812 return expand_vec_perm_even_odd_1 (d
, odd
);
37815 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37816 permutations. We assume that expand_vec_perm_1 has already failed. */
37819 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
37821 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
37822 enum machine_mode vmode
= d
->vmode
;
37823 unsigned char perm2
[4];
37831 /* These are special-cased in sse.md so that we can optionally
37832 use the vbroadcast instruction. They expand to two insns
37833 if the input happens to be in a register. */
37834 gcc_unreachable ();
37840 /* These are always implementable using standard shuffle patterns. */
37841 gcc_unreachable ();
37845 /* These can be implemented via interleave. We save one insn by
37846 stopping once we have promoted to V4SImode and then use pshufd. */
37850 rtx (*gen
) (rtx
, rtx
, rtx
)
37851 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
37852 : gen_vec_interleave_lowv8hi
;
37856 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
37857 : gen_vec_interleave_highv8hi
;
37862 dest
= gen_reg_rtx (vmode
);
37863 emit_insn (gen (dest
, op0
, op0
));
37864 vmode
= get_mode_wider_vector (vmode
);
37865 op0
= gen_lowpart (vmode
, dest
);
37867 while (vmode
!= V4SImode
);
37869 memset (perm2
, elt
, 4);
37870 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
37879 /* For AVX2 broadcasts of the first element vpbroadcast* or
37880 vpermq should be used by expand_vec_perm_1. */
37881 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
37885 gcc_unreachable ();
37889 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37890 broadcast permutations. */
37893 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
37895 unsigned i
, elt
, nelt
= d
->nelt
;
37897 if (!d
->one_operand_p
)
37901 for (i
= 1; i
< nelt
; ++i
)
37902 if (d
->perm
[i
] != elt
)
37905 return expand_vec_perm_broadcast_1 (d
);
37908 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37909 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37910 all the shorter instruction sequences. */
37913 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
37915 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
37916 unsigned int i
, nelt
, eltsz
;
37920 || d
->one_operand_p
37921 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37928 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37930 /* Generate 4 permutation masks. If the required element is within
37931 the same lane, it is shuffled in. If the required element from the
37932 other lane, force a zero by setting bit 7 in the permutation mask.
37933 In the other mask the mask has non-negative elements if element
37934 is requested from the other lane, but also moved to the other lane,
37935 so that the result of vpshufb can have the two V2TImode halves
37937 m128
= GEN_INT (-128);
37938 for (i
= 0; i
< 32; ++i
)
37940 rperm
[0][i
] = m128
;
37941 rperm
[1][i
] = m128
;
37942 rperm
[2][i
] = m128
;
37943 rperm
[3][i
] = m128
;
37949 for (i
= 0; i
< nelt
; ++i
)
37951 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37952 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37953 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
37955 for (j
= 0; j
< eltsz
; ++j
)
37956 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
37957 used
[which
] = true;
37960 for (i
= 0; i
< 2; ++i
)
37962 if (!used
[2 * i
+ 1])
37967 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
37968 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
37969 vperm
= force_reg (V32QImode
, vperm
);
37970 h
[i
] = gen_reg_rtx (V32QImode
);
37971 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37972 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
37975 /* Swap the 128-byte lanes of h[X]. */
37976 for (i
= 0; i
< 2; ++i
)
37978 if (h
[i
] == NULL_RTX
)
37980 op
= gen_reg_rtx (V4DImode
);
37981 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
37982 const2_rtx
, GEN_INT (3), const0_rtx
,
37984 h
[i
] = gen_lowpart (V32QImode
, op
);
37987 for (i
= 0; i
< 2; ++i
)
37994 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
37995 vperm
= force_reg (V32QImode
, vperm
);
37996 l
[i
] = gen_reg_rtx (V32QImode
);
37997 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37998 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
38001 for (i
= 0; i
< 2; ++i
)
38005 op
= gen_reg_rtx (V32QImode
);
38006 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
38013 gcc_assert (l
[0] && l
[1]);
38014 op
= gen_lowpart (V32QImode
, d
->target
);
38015 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
38019 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
38020 With all of the interface bits taken care of, perform the expansion
38021 in D and return true on success. */
38024 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
38026 /* Try a single instruction expansion. */
38027 if (expand_vec_perm_1 (d
))
38030 /* Try sequences of two instructions. */
38032 if (expand_vec_perm_pshuflw_pshufhw (d
))
38035 if (expand_vec_perm_palignr (d
))
38038 if (expand_vec_perm_interleave2 (d
))
38041 if (expand_vec_perm_broadcast (d
))
38044 if (expand_vec_perm_vpermq_perm_1 (d
))
38047 if (expand_vec_perm_vperm2f128 (d
))
38050 /* Try sequences of three instructions. */
38052 if (expand_vec_perm_pshufb2 (d
))
38055 if (expand_vec_perm_interleave3 (d
))
38058 if (expand_vec_perm_vperm2f128_vblend (d
))
38061 /* Try sequences of four instructions. */
38063 if (expand_vec_perm_vpshufb2_vpermq (d
))
38066 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
38069 /* ??? Look for narrow permutations whose element orderings would
38070 allow the promotion to a wider mode. */
38072 /* ??? Look for sequences of interleave or a wider permute that place
38073 the data into the correct lanes for a half-vector shuffle like
38074 pshuf[lh]w or vpermilps. */
38076 /* ??? Look for sequences of interleave that produce the desired results.
38077 The combinatorics of punpck[lh] get pretty ugly... */
38079 if (expand_vec_perm_even_odd (d
))
38082 /* Even longer sequences. */
38083 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
38090 ix86_expand_vec_perm_const (rtx operands
[4])
38092 struct expand_vec_perm_d d
;
38093 unsigned char perm
[MAX_VECT_LEN
];
38094 int i
, nelt
, which
;
38097 d
.target
= operands
[0];
38098 d
.op0
= operands
[1];
38099 d
.op1
= operands
[2];
38102 d
.vmode
= GET_MODE (d
.target
);
38103 gcc_assert (VECTOR_MODE_P (d
.vmode
));
38104 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38105 d
.testing_p
= false;
38107 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
38108 gcc_assert (XVECLEN (sel
, 0) == nelt
);
38109 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
38111 for (i
= which
= 0; i
< nelt
; ++i
)
38113 rtx e
= XVECEXP (sel
, 0, i
);
38114 int ei
= INTVAL (e
) & (2 * nelt
- 1);
38116 which
|= (ei
< nelt
? 1 : 2);
38121 d
.one_operand_p
= true;
38128 if (!rtx_equal_p (d
.op0
, d
.op1
))
38130 d
.one_operand_p
= false;
38133 /* The elements of PERM do not suggest that only the first operand
38134 is used, but both operands are identical. Allow easier matching
38135 of the permutation by folding the permutation into the single
38140 for (i
= 0; i
< nelt
; ++i
)
38141 d
.perm
[i
] &= nelt
- 1;
38150 if (ix86_expand_vec_perm_const_1 (&d
))
38153 /* If the selector says both arguments are needed, but the operands are the
38154 same, the above tried to expand with one_operand_p and flattened selector.
38155 If that didn't work, retry without one_operand_p; we succeeded with that
38157 if (which
== 3 && d
.one_operand_p
)
38159 d
.one_operand_p
= false;
38160 memcpy (d
.perm
, perm
, sizeof (perm
));
38161 return ix86_expand_vec_perm_const_1 (&d
);
38167 /* Implement targetm.vectorize.vec_perm_const_ok. */
38170 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
38171 const unsigned char *sel
)
38173 struct expand_vec_perm_d d
;
38174 unsigned int i
, nelt
, which
;
38178 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38179 d
.testing_p
= true;
38181 /* Given sufficient ISA support we can just return true here
38182 for selected vector modes. */
38183 if (GET_MODE_SIZE (d
.vmode
) == 16)
38185 /* All implementable with a single vpperm insn. */
38188 /* All implementable with 2 pshufb + 1 ior. */
38191 /* All implementable with shufpd or unpck[lh]pd. */
38196 /* Extract the values from the vector CST into the permutation
38198 memcpy (d
.perm
, sel
, nelt
);
38199 for (i
= which
= 0; i
< nelt
; ++i
)
38201 unsigned char e
= d
.perm
[i
];
38202 gcc_assert (e
< 2 * nelt
);
38203 which
|= (e
< nelt
? 1 : 2);
38206 /* For all elements from second vector, fold the elements to first. */
38208 for (i
= 0; i
< nelt
; ++i
)
38211 /* Check whether the mask can be applied to the vector type. */
38212 d
.one_operand_p
= (which
!= 3);
38214 /* Implementable with shufps or pshufd. */
38215 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
38218 /* Otherwise we have to go through the motions and see if we can
38219 figure out how to generate the requested permutation. */
38220 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
38221 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
38222 if (!d
.one_operand_p
)
38223 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
38226 ret
= ix86_expand_vec_perm_const_1 (&d
);
38233 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
38235 struct expand_vec_perm_d d
;
38241 d
.vmode
= GET_MODE (targ
);
38242 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38243 d
.one_operand_p
= false;
38244 d
.testing_p
= false;
38246 for (i
= 0; i
< nelt
; ++i
)
38247 d
.perm
[i
] = i
* 2 + odd
;
38249 /* We'll either be able to implement the permutation directly... */
38250 if (expand_vec_perm_1 (&d
))
38253 /* ... or we use the special-case patterns. */
38254 expand_vec_perm_even_odd_1 (&d
, odd
);
38257 /* Expand an insert into a vector register through pinsr insn.
38258 Return true if successful. */
38261 ix86_expand_pinsr (rtx
*operands
)
38263 rtx dst
= operands
[0];
38264 rtx src
= operands
[3];
38266 unsigned int size
= INTVAL (operands
[1]);
38267 unsigned int pos
= INTVAL (operands
[2]);
38269 if (GET_CODE (dst
) == SUBREG
)
38271 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
38272 dst
= SUBREG_REG (dst
);
38275 if (GET_CODE (src
) == SUBREG
)
38276 src
= SUBREG_REG (src
);
38278 switch (GET_MODE (dst
))
38285 enum machine_mode srcmode
, dstmode
;
38286 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
38288 srcmode
= mode_for_size (size
, MODE_INT
, 0);
38293 if (!TARGET_SSE4_1
)
38295 dstmode
= V16QImode
;
38296 pinsr
= gen_sse4_1_pinsrb
;
38302 dstmode
= V8HImode
;
38303 pinsr
= gen_sse2_pinsrw
;
38307 if (!TARGET_SSE4_1
)
38309 dstmode
= V4SImode
;
38310 pinsr
= gen_sse4_1_pinsrd
;
38314 gcc_assert (TARGET_64BIT
);
38315 if (!TARGET_SSE4_1
)
38317 dstmode
= V2DImode
;
38318 pinsr
= gen_sse4_1_pinsrq
;
38325 dst
= gen_lowpart (dstmode
, dst
);
38326 src
= gen_lowpart (srcmode
, src
);
38330 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
38339 /* This function returns the calling abi specific va_list type node.
38340 It returns the FNDECL specific va_list type. */
38343 ix86_fn_abi_va_list (tree fndecl
)
38346 return va_list_type_node
;
38347 gcc_assert (fndecl
!= NULL_TREE
);
38349 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
38350 return ms_va_list_type_node
;
38352 return sysv_va_list_type_node
;
38355 /* Returns the canonical va_list type specified by TYPE. If there
38356 is no valid TYPE provided, it return NULL_TREE. */
38359 ix86_canonical_va_list_type (tree type
)
38363 /* Resolve references and pointers to va_list type. */
38364 if (TREE_CODE (type
) == MEM_REF
)
38365 type
= TREE_TYPE (type
);
38366 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
38367 type
= TREE_TYPE (type
);
38368 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
38369 type
= TREE_TYPE (type
);
38371 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
38373 wtype
= va_list_type_node
;
38374 gcc_assert (wtype
!= NULL_TREE
);
38376 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38378 /* If va_list is an array type, the argument may have decayed
38379 to a pointer type, e.g. by being passed to another function.
38380 In that case, unwrap both types so that we can compare the
38381 underlying records. */
38382 if (TREE_CODE (htype
) == ARRAY_TYPE
38383 || POINTER_TYPE_P (htype
))
38385 wtype
= TREE_TYPE (wtype
);
38386 htype
= TREE_TYPE (htype
);
38389 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38390 return va_list_type_node
;
38391 wtype
= sysv_va_list_type_node
;
38392 gcc_assert (wtype
!= NULL_TREE
);
38394 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38396 /* If va_list is an array type, the argument may have decayed
38397 to a pointer type, e.g. by being passed to another function.
38398 In that case, unwrap both types so that we can compare the
38399 underlying records. */
38400 if (TREE_CODE (htype
) == ARRAY_TYPE
38401 || POINTER_TYPE_P (htype
))
38403 wtype
= TREE_TYPE (wtype
);
38404 htype
= TREE_TYPE (htype
);
38407 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38408 return sysv_va_list_type_node
;
38409 wtype
= ms_va_list_type_node
;
38410 gcc_assert (wtype
!= NULL_TREE
);
38412 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38414 /* If va_list is an array type, the argument may have decayed
38415 to a pointer type, e.g. by being passed to another function.
38416 In that case, unwrap both types so that we can compare the
38417 underlying records. */
38418 if (TREE_CODE (htype
) == ARRAY_TYPE
38419 || POINTER_TYPE_P (htype
))
38421 wtype
= TREE_TYPE (wtype
);
38422 htype
= TREE_TYPE (htype
);
38425 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38426 return ms_va_list_type_node
;
38429 return std_canonical_va_list_type (type
);
38432 /* Iterate through the target-specific builtin types for va_list.
38433 IDX denotes the iterator, *PTREE is set to the result type of
38434 the va_list builtin, and *PNAME to its internal type.
38435 Returns zero if there is no element for this index, otherwise
38436 IDX should be increased upon the next call.
38437 Note, do not iterate a base builtin's name like __builtin_va_list.
38438 Used from c_common_nodes_and_builtins. */
38441 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
38451 *ptree
= ms_va_list_type_node
;
38452 *pname
= "__builtin_ms_va_list";
38456 *ptree
= sysv_va_list_type_node
;
38457 *pname
= "__builtin_sysv_va_list";
38465 #undef TARGET_SCHED_DISPATCH
38466 #define TARGET_SCHED_DISPATCH has_dispatch
38467 #undef TARGET_SCHED_DISPATCH_DO
38468 #define TARGET_SCHED_DISPATCH_DO do_dispatch
38469 #undef TARGET_SCHED_REASSOCIATION_WIDTH
38470 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
38472 /* The size of the dispatch window is the total number of bytes of
38473 object code allowed in a window. */
38474 #define DISPATCH_WINDOW_SIZE 16
38476 /* Number of dispatch windows considered for scheduling. */
38477 #define MAX_DISPATCH_WINDOWS 3
38479 /* Maximum number of instructions in a window. */
38482 /* Maximum number of immediate operands in a window. */
38485 /* Maximum number of immediate bits allowed in a window. */
38486 #define MAX_IMM_SIZE 128
38488 /* Maximum number of 32 bit immediates allowed in a window. */
38489 #define MAX_IMM_32 4
38491 /* Maximum number of 64 bit immediates allowed in a window. */
38492 #define MAX_IMM_64 2
38494 /* Maximum total of loads or prefetches allowed in a window. */
38497 /* Maximum total of stores allowed in a window. */
38498 #define MAX_STORE 1
38504 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
38505 enum dispatch_group
{
38520 /* Number of allowable groups in a dispatch window. It is an array
38521 indexed by dispatch_group enum. 100 is used as a big number,
38522 because the number of these kind of operations does not have any
38523 effect in dispatch window, but we need them for other reasons in
38525 static unsigned int num_allowable_groups
[disp_last
] = {
38526 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
38529 char group_name
[disp_last
+ 1][16] = {
38530 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
38531 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
38532 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
38535 /* Instruction path. */
38538 path_single
, /* Single micro op. */
38539 path_double
, /* Double micro op. */
38540 path_multi
, /* Instructions with more than 2 micro op.. */
38544 /* sched_insn_info defines a window to the instructions scheduled in
38545 the basic block. It contains a pointer to the insn_info table and
38546 the instruction scheduled.
38548 Windows are allocated for each basic block and are linked
38550 typedef struct sched_insn_info_s
{
38552 enum dispatch_group group
;
38553 enum insn_path path
;
38558 /* Linked list of dispatch windows. This is a two way list of
38559 dispatch windows of a basic block. It contains information about
38560 the number of uops in the window and the total number of
38561 instructions and of bytes in the object code for this dispatch
38563 typedef struct dispatch_windows_s
{
38564 int num_insn
; /* Number of insn in the window. */
38565 int num_uops
; /* Number of uops in the window. */
38566 int window_size
; /* Number of bytes in the window. */
38567 int window_num
; /* Window number between 0 or 1. */
38568 int num_imm
; /* Number of immediates in an insn. */
38569 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
38570 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
38571 int imm_size
; /* Total immediates in the window. */
38572 int num_loads
; /* Total memory loads in the window. */
38573 int num_stores
; /* Total memory stores in the window. */
38574 int violation
; /* Violation exists in window. */
38575 sched_insn_info
*window
; /* Pointer to the window. */
38576 struct dispatch_windows_s
*next
;
38577 struct dispatch_windows_s
*prev
;
38578 } dispatch_windows
;
38580 /* Immediate valuse used in an insn. */
38581 typedef struct imm_info_s
38588 static dispatch_windows
*dispatch_window_list
;
38589 static dispatch_windows
*dispatch_window_list1
;
38591 /* Get dispatch group of insn. */
38593 static enum dispatch_group
38594 get_mem_group (rtx insn
)
38596 enum attr_memory memory
;
38598 if (INSN_CODE (insn
) < 0)
38599 return disp_no_group
;
38600 memory
= get_attr_memory (insn
);
38601 if (memory
== MEMORY_STORE
)
38604 if (memory
== MEMORY_LOAD
)
38607 if (memory
== MEMORY_BOTH
)
38608 return disp_load_store
;
38610 return disp_no_group
;
38613 /* Return true if insn is a compare instruction. */
38618 enum attr_type type
;
38620 type
= get_attr_type (insn
);
38621 return (type
== TYPE_TEST
38622 || type
== TYPE_ICMP
38623 || type
== TYPE_FCMP
38624 || GET_CODE (PATTERN (insn
)) == COMPARE
);
38627 /* Return true if a dispatch violation encountered. */
38630 dispatch_violation (void)
38632 if (dispatch_window_list
->next
)
38633 return dispatch_window_list
->next
->violation
;
38634 return dispatch_window_list
->violation
;
38637 /* Return true if insn is a branch instruction. */
38640 is_branch (rtx insn
)
38642 return (CALL_P (insn
) || JUMP_P (insn
));
38645 /* Return true if insn is a prefetch instruction. */
38648 is_prefetch (rtx insn
)
38650 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
38653 /* This function initializes a dispatch window and the list container holding a
38654 pointer to the window. */
38657 init_window (int window_num
)
38660 dispatch_windows
*new_list
;
38662 if (window_num
== 0)
38663 new_list
= dispatch_window_list
;
38665 new_list
= dispatch_window_list1
;
38667 new_list
->num_insn
= 0;
38668 new_list
->num_uops
= 0;
38669 new_list
->window_size
= 0;
38670 new_list
->next
= NULL
;
38671 new_list
->prev
= NULL
;
38672 new_list
->window_num
= window_num
;
38673 new_list
->num_imm
= 0;
38674 new_list
->num_imm_32
= 0;
38675 new_list
->num_imm_64
= 0;
38676 new_list
->imm_size
= 0;
38677 new_list
->num_loads
= 0;
38678 new_list
->num_stores
= 0;
38679 new_list
->violation
= false;
38681 for (i
= 0; i
< MAX_INSN
; i
++)
38683 new_list
->window
[i
].insn
= NULL
;
38684 new_list
->window
[i
].group
= disp_no_group
;
38685 new_list
->window
[i
].path
= no_path
;
38686 new_list
->window
[i
].byte_len
= 0;
38687 new_list
->window
[i
].imm_bytes
= 0;
38692 /* This function allocates and initializes a dispatch window and the
38693 list container holding a pointer to the window. */
38695 static dispatch_windows
*
38696 allocate_window (void)
38698 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
38699 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
38704 /* This routine initializes the dispatch scheduling information. It
38705 initiates building dispatch scheduler tables and constructs the
38706 first dispatch window. */
38709 init_dispatch_sched (void)
38711 /* Allocate a dispatch list and a window. */
38712 dispatch_window_list
= allocate_window ();
38713 dispatch_window_list1
= allocate_window ();
38718 /* This function returns true if a branch is detected. End of a basic block
38719 does not have to be a branch, but here we assume only branches end a
38723 is_end_basic_block (enum dispatch_group group
)
38725 return group
== disp_branch
;
38728 /* This function is called when the end of a window processing is reached. */
38731 process_end_window (void)
38733 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
38734 if (dispatch_window_list
->next
)
38736 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
38737 gcc_assert (dispatch_window_list
->window_size
38738 + dispatch_window_list1
->window_size
<= 48);
38744 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38745 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38746 for 48 bytes of instructions. Note that these windows are not dispatch
38747 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38749 static dispatch_windows
*
38750 allocate_next_window (int window_num
)
38752 if (window_num
== 0)
38754 if (dispatch_window_list
->next
)
38757 return dispatch_window_list
;
38760 dispatch_window_list
->next
= dispatch_window_list1
;
38761 dispatch_window_list1
->prev
= dispatch_window_list
;
38763 return dispatch_window_list1
;
38766 /* Increment the number of immediate operands of an instruction. */
38769 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
38774 switch ( GET_CODE (*in_rtx
))
38779 (imm_values
->imm
)++;
38780 if (x86_64_immediate_operand (*in_rtx
, SImode
))
38781 (imm_values
->imm32
)++;
38783 (imm_values
->imm64
)++;
38787 (imm_values
->imm
)++;
38788 (imm_values
->imm64
)++;
38792 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
38794 (imm_values
->imm
)++;
38795 (imm_values
->imm32
)++;
38806 /* Compute number of immediate operands of an instruction. */
38809 find_constant (rtx in_rtx
, imm_info
*imm_values
)
38811 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
38812 (rtx_function
) find_constant_1
, (void *) imm_values
);
38815 /* Return total size of immediate operands of an instruction along with number
38816 of corresponding immediate-operands. It initializes its parameters to zero
38817 befor calling FIND_CONSTANT.
38818 INSN is the input instruction. IMM is the total of immediates.
38819 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38823 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
38825 imm_info imm_values
= {0, 0, 0};
38827 find_constant (insn
, &imm_values
);
38828 *imm
= imm_values
.imm
;
38829 *imm32
= imm_values
.imm32
;
38830 *imm64
= imm_values
.imm64
;
38831 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
38834 /* This function indicates if an operand of an instruction is an
38838 has_immediate (rtx insn
)
38840 int num_imm_operand
;
38841 int num_imm32_operand
;
38842 int num_imm64_operand
;
38845 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38846 &num_imm64_operand
);
38850 /* Return single or double path for instructions. */
38852 static enum insn_path
38853 get_insn_path (rtx insn
)
38855 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
38857 if ((int)path
== 0)
38858 return path_single
;
38860 if ((int)path
== 1)
38861 return path_double
;
38866 /* Return insn dispatch group. */
38868 static enum dispatch_group
38869 get_insn_group (rtx insn
)
38871 enum dispatch_group group
= get_mem_group (insn
);
38875 if (is_branch (insn
))
38876 return disp_branch
;
38881 if (has_immediate (insn
))
38884 if (is_prefetch (insn
))
38885 return disp_prefetch
;
38887 return disp_no_group
;
38890 /* Count number of GROUP restricted instructions in a dispatch
38891 window WINDOW_LIST. */
38894 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
38896 enum dispatch_group group
= get_insn_group (insn
);
38898 int num_imm_operand
;
38899 int num_imm32_operand
;
38900 int num_imm64_operand
;
38902 if (group
== disp_no_group
)
38905 if (group
== disp_imm
)
38907 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38908 &num_imm64_operand
);
38909 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
38910 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
38911 || (num_imm32_operand
> 0
38912 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
38913 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
38914 || (num_imm64_operand
> 0
38915 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
38916 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
38917 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
38918 && num_imm64_operand
> 0
38919 && ((window_list
->num_imm_64
> 0
38920 && window_list
->num_insn
>= 2)
38921 || window_list
->num_insn
>= 3)))
38927 if ((group
== disp_load_store
38928 && (window_list
->num_loads
>= MAX_LOAD
38929 || window_list
->num_stores
>= MAX_STORE
))
38930 || ((group
== disp_load
38931 || group
== disp_prefetch
)
38932 && window_list
->num_loads
>= MAX_LOAD
)
38933 || (group
== disp_store
38934 && window_list
->num_stores
>= MAX_STORE
))
38940 /* This function returns true if insn satisfies dispatch rules on the
38941 last window scheduled. */
38944 fits_dispatch_window (rtx insn
)
38946 dispatch_windows
*window_list
= dispatch_window_list
;
38947 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
38948 unsigned int num_restrict
;
38949 enum dispatch_group group
= get_insn_group (insn
);
38950 enum insn_path path
= get_insn_path (insn
);
38953 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38954 instructions should be given the lowest priority in the
38955 scheduling process in Haifa scheduler to make sure they will be
38956 scheduled in the same dispatch window as the refrence to them. */
38957 if (group
== disp_jcc
|| group
== disp_cmp
)
38960 /* Check nonrestricted. */
38961 if (group
== disp_no_group
|| group
== disp_branch
)
38964 /* Get last dispatch window. */
38965 if (window_list_next
)
38966 window_list
= window_list_next
;
38968 if (window_list
->window_num
== 1)
38970 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
38973 || (min_insn_size (insn
) + sum
) >= 48)
38974 /* Window 1 is full. Go for next window. */
38978 num_restrict
= count_num_restricted (insn
, window_list
);
38980 if (num_restrict
> num_allowable_groups
[group
])
38983 /* See if it fits in the first window. */
38984 if (window_list
->window_num
== 0)
38986 /* The first widow should have only single and double path
38988 if (path
== path_double
38989 && (window_list
->num_uops
+ 2) > MAX_INSN
)
38991 else if (path
!= path_single
)
38997 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38998 dispatch window WINDOW_LIST. */
39001 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
39003 int byte_len
= min_insn_size (insn
);
39004 int num_insn
= window_list
->num_insn
;
39006 sched_insn_info
*window
= window_list
->window
;
39007 enum dispatch_group group
= get_insn_group (insn
);
39008 enum insn_path path
= get_insn_path (insn
);
39009 int num_imm_operand
;
39010 int num_imm32_operand
;
39011 int num_imm64_operand
;
39013 if (!window_list
->violation
&& group
!= disp_cmp
39014 && !fits_dispatch_window (insn
))
39015 window_list
->violation
= true;
39017 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39018 &num_imm64_operand
);
39020 /* Initialize window with new instruction. */
39021 window
[num_insn
].insn
= insn
;
39022 window
[num_insn
].byte_len
= byte_len
;
39023 window
[num_insn
].group
= group
;
39024 window
[num_insn
].path
= path
;
39025 window
[num_insn
].imm_bytes
= imm_size
;
39027 window_list
->window_size
+= byte_len
;
39028 window_list
->num_insn
= num_insn
+ 1;
39029 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
39030 window_list
->imm_size
+= imm_size
;
39031 window_list
->num_imm
+= num_imm_operand
;
39032 window_list
->num_imm_32
+= num_imm32_operand
;
39033 window_list
->num_imm_64
+= num_imm64_operand
;
39035 if (group
== disp_store
)
39036 window_list
->num_stores
+= 1;
39037 else if (group
== disp_load
39038 || group
== disp_prefetch
)
39039 window_list
->num_loads
+= 1;
39040 else if (group
== disp_load_store
)
39042 window_list
->num_stores
+= 1;
39043 window_list
->num_loads
+= 1;
39047 /* Adds a scheduled instruction, INSN, to the current dispatch window.
39048 If the total bytes of instructions or the number of instructions in
39049 the window exceed allowable, it allocates a new window. */
39052 add_to_dispatch_window (rtx insn
)
39055 dispatch_windows
*window_list
;
39056 dispatch_windows
*next_list
;
39057 dispatch_windows
*window0_list
;
39058 enum insn_path path
;
39059 enum dispatch_group insn_group
;
39067 if (INSN_CODE (insn
) < 0)
39070 byte_len
= min_insn_size (insn
);
39071 window_list
= dispatch_window_list
;
39072 next_list
= window_list
->next
;
39073 path
= get_insn_path (insn
);
39074 insn_group
= get_insn_group (insn
);
39076 /* Get the last dispatch window. */
39078 window_list
= dispatch_window_list
->next
;
39080 if (path
== path_single
)
39082 else if (path
== path_double
)
39085 insn_num_uops
= (int) path
;
39087 /* If current window is full, get a new window.
39088 Window number zero is full, if MAX_INSN uops are scheduled in it.
39089 Window number one is full, if window zero's bytes plus window
39090 one's bytes is 32, or if the bytes of the new instruction added
39091 to the total makes it greater than 48, or it has already MAX_INSN
39092 instructions in it. */
39093 num_insn
= window_list
->num_insn
;
39094 num_uops
= window_list
->num_uops
;
39095 window_num
= window_list
->window_num
;
39096 insn_fits
= fits_dispatch_window (insn
);
39098 if (num_insn
>= MAX_INSN
39099 || num_uops
+ insn_num_uops
> MAX_INSN
39102 window_num
= ~window_num
& 1;
39103 window_list
= allocate_next_window (window_num
);
39106 if (window_num
== 0)
39108 add_insn_window (insn
, window_list
, insn_num_uops
);
39109 if (window_list
->num_insn
>= MAX_INSN
39110 && insn_group
== disp_branch
)
39112 process_end_window ();
39116 else if (window_num
== 1)
39118 window0_list
= window_list
->prev
;
39119 sum
= window0_list
->window_size
+ window_list
->window_size
;
39121 || (byte_len
+ sum
) >= 48)
39123 process_end_window ();
39124 window_list
= dispatch_window_list
;
39127 add_insn_window (insn
, window_list
, insn_num_uops
);
39130 gcc_unreachable ();
39132 if (is_end_basic_block (insn_group
))
39134 /* End of basic block is reached do end-basic-block process. */
39135 process_end_window ();
39140 /* Print the dispatch window, WINDOW_NUM, to FILE. */
39142 DEBUG_FUNCTION
static void
39143 debug_dispatch_window_file (FILE *file
, int window_num
)
39145 dispatch_windows
*list
;
39148 if (window_num
== 0)
39149 list
= dispatch_window_list
;
39151 list
= dispatch_window_list1
;
39153 fprintf (file
, "Window #%d:\n", list
->window_num
);
39154 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
39155 list
->num_insn
, list
->num_uops
, list
->window_size
);
39156 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
39157 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
39159 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
39161 fprintf (file
, " insn info:\n");
39163 for (i
= 0; i
< MAX_INSN
; i
++)
39165 if (!list
->window
[i
].insn
)
39167 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
39168 i
, group_name
[list
->window
[i
].group
],
39169 i
, (void *)list
->window
[i
].insn
,
39170 i
, list
->window
[i
].path
,
39171 i
, list
->window
[i
].byte_len
,
39172 i
, list
->window
[i
].imm_bytes
);
39176 /* Print to stdout a dispatch window. */
39178 DEBUG_FUNCTION
void
39179 debug_dispatch_window (int window_num
)
39181 debug_dispatch_window_file (stdout
, window_num
);
39184 /* Print INSN dispatch information to FILE. */
39186 DEBUG_FUNCTION
static void
39187 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
39190 enum insn_path path
;
39191 enum dispatch_group group
;
39193 int num_imm_operand
;
39194 int num_imm32_operand
;
39195 int num_imm64_operand
;
39197 if (INSN_CODE (insn
) < 0)
39200 byte_len
= min_insn_size (insn
);
39201 path
= get_insn_path (insn
);
39202 group
= get_insn_group (insn
);
39203 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39204 &num_imm64_operand
);
39206 fprintf (file
, " insn info:\n");
39207 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
39208 group_name
[group
], path
, byte_len
);
39209 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
39210 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
39213 /* Print to STDERR the status of the ready list with respect to
39214 dispatch windows. */
39216 DEBUG_FUNCTION
void
39217 debug_ready_dispatch (void)
39220 int no_ready
= number_in_ready ();
39222 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
39224 for (i
= 0; i
< no_ready
; i
++)
39225 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
39228 /* This routine is the driver of the dispatch scheduler. */
39231 do_dispatch (rtx insn
, int mode
)
39233 if (mode
== DISPATCH_INIT
)
39234 init_dispatch_sched ();
39235 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
39236 add_to_dispatch_window (insn
);
39239 /* Return TRUE if Dispatch Scheduling is supported. */
39242 has_dispatch (rtx insn
, int action
)
39244 if ((TARGET_BDVER1
|| TARGET_BDVER2
)
39245 && flag_dispatch_scheduler
)
39251 case IS_DISPATCH_ON
:
39256 return is_cmp (insn
);
39258 case DISPATCH_VIOLATION
:
39259 return dispatch_violation ();
39261 case FITS_DISPATCH_WINDOW
:
39262 return fits_dispatch_window (insn
);
39268 /* Implementation of reassociation_width target hook used by
39269 reassoc phase to identify parallelism level in reassociated
39270 tree. Statements tree_code is passed in OPC. Arguments type
39273 Currently parallel reassociation is enabled for Atom
39274 processors only and we set reassociation width to be 2
39275 because Atom may issue up to 2 instructions per cycle.
39277 Return value should be fixed if parallel reassociation is
39278 enabled for other processors. */
39281 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
39282 enum machine_mode mode
)
39286 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
39288 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
39294 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
39295 place emms and femms instructions. */
39297 static enum machine_mode
39298 ix86_preferred_simd_mode (enum machine_mode mode
)
39306 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
39308 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
39310 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
39312 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
39315 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
39321 if (!TARGET_VECTORIZE_DOUBLE
)
39323 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
39325 else if (TARGET_SSE2
)
39334 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
39337 static unsigned int
39338 ix86_autovectorize_vector_sizes (void)
39340 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
39343 /* Validate target specific memory model bits in VAL. */
39345 static unsigned HOST_WIDE_INT
39346 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
39348 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
39349 unsigned HOST_WIDE_INT strong
;
39351 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
39353 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
39355 warning (OPT_Winvalid_memory_model
,
39356 "Unknown architecture specific memory model");
39357 return MEMMODEL_SEQ_CST
;
39359 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
39360 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
39362 warning (OPT_Winvalid_memory_model
,
39363 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
39364 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
39366 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
39368 warning (OPT_Winvalid_memory_model
,
39369 "HLE_RELEASE not used with RELEASE or stronger memory model");
39370 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
39375 /* Initialize the GCC target structure. */
39376 #undef TARGET_RETURN_IN_MEMORY
39377 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
39379 #undef TARGET_LEGITIMIZE_ADDRESS
39380 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
39382 #undef TARGET_ATTRIBUTE_TABLE
39383 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
39384 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39385 # undef TARGET_MERGE_DECL_ATTRIBUTES
39386 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
39389 #undef TARGET_COMP_TYPE_ATTRIBUTES
39390 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
39392 #undef TARGET_INIT_BUILTINS
39393 #define TARGET_INIT_BUILTINS ix86_init_builtins
39394 #undef TARGET_BUILTIN_DECL
39395 #define TARGET_BUILTIN_DECL ix86_builtin_decl
39396 #undef TARGET_EXPAND_BUILTIN
39397 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
39399 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
39400 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
39401 ix86_builtin_vectorized_function
39403 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
39404 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
39406 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
39407 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
39409 #undef TARGET_VECTORIZE_BUILTIN_GATHER
39410 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
39412 #undef TARGET_BUILTIN_RECIPROCAL
39413 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
39415 #undef TARGET_ASM_FUNCTION_EPILOGUE
39416 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
39418 #undef TARGET_ENCODE_SECTION_INFO
39419 #ifndef SUBTARGET_ENCODE_SECTION_INFO
39420 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
39422 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
39425 #undef TARGET_ASM_OPEN_PAREN
39426 #define TARGET_ASM_OPEN_PAREN ""
39427 #undef TARGET_ASM_CLOSE_PAREN
39428 #define TARGET_ASM_CLOSE_PAREN ""
39430 #undef TARGET_ASM_BYTE_OP
39431 #define TARGET_ASM_BYTE_OP ASM_BYTE
39433 #undef TARGET_ASM_ALIGNED_HI_OP
39434 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
39435 #undef TARGET_ASM_ALIGNED_SI_OP
39436 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
39438 #undef TARGET_ASM_ALIGNED_DI_OP
39439 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
39442 #undef TARGET_PROFILE_BEFORE_PROLOGUE
39443 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
39445 #undef TARGET_ASM_UNALIGNED_HI_OP
39446 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
39447 #undef TARGET_ASM_UNALIGNED_SI_OP
39448 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
39449 #undef TARGET_ASM_UNALIGNED_DI_OP
39450 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
39452 #undef TARGET_PRINT_OPERAND
39453 #define TARGET_PRINT_OPERAND ix86_print_operand
39454 #undef TARGET_PRINT_OPERAND_ADDRESS
39455 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
39456 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
39457 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
39458 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
39459 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
39461 #undef TARGET_SCHED_INIT_GLOBAL
39462 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
39463 #undef TARGET_SCHED_ADJUST_COST
39464 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
39465 #undef TARGET_SCHED_ISSUE_RATE
39466 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
39467 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
39468 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
39469 ia32_multipass_dfa_lookahead
39471 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
39472 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
39474 #undef TARGET_MEMMODEL_CHECK
39475 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
39478 #undef TARGET_HAVE_TLS
39479 #define TARGET_HAVE_TLS true
39481 #undef TARGET_CANNOT_FORCE_CONST_MEM
39482 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
39483 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
39484 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
39486 #undef TARGET_DELEGITIMIZE_ADDRESS
39487 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
39489 #undef TARGET_MS_BITFIELD_LAYOUT_P
39490 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
39493 #undef TARGET_BINDS_LOCAL_P
39494 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
39496 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39497 #undef TARGET_BINDS_LOCAL_P
39498 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
39501 #undef TARGET_ASM_OUTPUT_MI_THUNK
39502 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
39503 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
39504 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
39506 #undef TARGET_ASM_FILE_START
39507 #define TARGET_ASM_FILE_START x86_file_start
39509 #undef TARGET_OPTION_OVERRIDE
39510 #define TARGET_OPTION_OVERRIDE ix86_option_override
39512 #undef TARGET_REGISTER_MOVE_COST
39513 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
39514 #undef TARGET_MEMORY_MOVE_COST
39515 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
39516 #undef TARGET_RTX_COSTS
39517 #define TARGET_RTX_COSTS ix86_rtx_costs
39518 #undef TARGET_ADDRESS_COST
39519 #define TARGET_ADDRESS_COST ix86_address_cost
39521 #undef TARGET_FIXED_CONDITION_CODE_REGS
39522 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
39523 #undef TARGET_CC_MODES_COMPATIBLE
39524 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
39526 #undef TARGET_MACHINE_DEPENDENT_REORG
39527 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
39529 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
39530 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
39532 #undef TARGET_BUILD_BUILTIN_VA_LIST
39533 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
39535 #undef TARGET_FOLD_BUILTIN
39536 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
39538 #undef TARGET_ENUM_VA_LIST_P
39539 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
39541 #undef TARGET_FN_ABI_VA_LIST
39542 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
39544 #undef TARGET_CANONICAL_VA_LIST_TYPE
39545 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
39547 #undef TARGET_EXPAND_BUILTIN_VA_START
39548 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
39550 #undef TARGET_MD_ASM_CLOBBERS
39551 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
39553 #undef TARGET_PROMOTE_PROTOTYPES
39554 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
39555 #undef TARGET_STRUCT_VALUE_RTX
39556 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
39557 #undef TARGET_SETUP_INCOMING_VARARGS
39558 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
39559 #undef TARGET_MUST_PASS_IN_STACK
39560 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
39561 #undef TARGET_FUNCTION_ARG_ADVANCE
39562 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
39563 #undef TARGET_FUNCTION_ARG
39564 #define TARGET_FUNCTION_ARG ix86_function_arg
39565 #undef TARGET_FUNCTION_ARG_BOUNDARY
39566 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
39567 #undef TARGET_PASS_BY_REFERENCE
39568 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
39569 #undef TARGET_INTERNAL_ARG_POINTER
39570 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
39571 #undef TARGET_UPDATE_STACK_BOUNDARY
39572 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
39573 #undef TARGET_GET_DRAP_RTX
39574 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
39575 #undef TARGET_STRICT_ARGUMENT_NAMING
39576 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
39577 #undef TARGET_STATIC_CHAIN
39578 #define TARGET_STATIC_CHAIN ix86_static_chain
39579 #undef TARGET_TRAMPOLINE_INIT
39580 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
39581 #undef TARGET_RETURN_POPS_ARGS
39582 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
39584 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
39585 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
39587 #undef TARGET_SCALAR_MODE_SUPPORTED_P
39588 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
39590 #undef TARGET_VECTOR_MODE_SUPPORTED_P
39591 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
39593 #undef TARGET_C_MODE_FOR_SUFFIX
39594 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
39597 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
39598 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
39601 #ifdef SUBTARGET_INSERT_ATTRIBUTES
39602 #undef TARGET_INSERT_ATTRIBUTES
39603 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
39606 #undef TARGET_MANGLE_TYPE
39607 #define TARGET_MANGLE_TYPE ix86_mangle_type
39610 #undef TARGET_STACK_PROTECT_FAIL
39611 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
39614 #undef TARGET_FUNCTION_VALUE
39615 #define TARGET_FUNCTION_VALUE ix86_function_value
39617 #undef TARGET_FUNCTION_VALUE_REGNO_P
39618 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
39620 #undef TARGET_PROMOTE_FUNCTION_MODE
39621 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
39623 #undef TARGET_SECONDARY_RELOAD
39624 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
39626 #undef TARGET_CLASS_MAX_NREGS
39627 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
39629 #undef TARGET_PREFERRED_RELOAD_CLASS
39630 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
39631 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
39632 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
39633 #undef TARGET_CLASS_LIKELY_SPILLED_P
39634 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
39636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
39637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
39638 ix86_builtin_vectorization_cost
39639 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
39640 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
39641 ix86_vectorize_vec_perm_const_ok
39642 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
39643 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
39644 ix86_preferred_simd_mode
39645 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
39646 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
39647 ix86_autovectorize_vector_sizes
39649 #undef TARGET_SET_CURRENT_FUNCTION
39650 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
39652 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
39653 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
39655 #undef TARGET_OPTION_SAVE
39656 #define TARGET_OPTION_SAVE ix86_function_specific_save
39658 #undef TARGET_OPTION_RESTORE
39659 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
39661 #undef TARGET_OPTION_PRINT
39662 #define TARGET_OPTION_PRINT ix86_function_specific_print
39664 #undef TARGET_CAN_INLINE_P
39665 #define TARGET_CAN_INLINE_P ix86_can_inline_p
39667 #undef TARGET_EXPAND_TO_RTL_HOOK
39668 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
39670 #undef TARGET_LEGITIMATE_ADDRESS_P
39671 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
39673 #undef TARGET_LEGITIMATE_CONSTANT_P
39674 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
39676 #undef TARGET_FRAME_POINTER_REQUIRED
39677 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
39679 #undef TARGET_CAN_ELIMINATE
39680 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
39682 #undef TARGET_EXTRA_LIVE_ON_ENTRY
39683 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
39685 #undef TARGET_ASM_CODE_END
39686 #define TARGET_ASM_CODE_END ix86_code_end
39688 #undef TARGET_CONDITIONAL_REGISTER_USAGE
39689 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
39692 #undef TARGET_INIT_LIBFUNCS
39693 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
39696 struct gcc_target targetm
= TARGET_INITIALIZER
;
39698 #include "gt-i386.h"