1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
65 enum upper_128bits_state
72 typedef struct block_info_def
74 /* State of the upper 128bits of AVX registers at exit. */
75 enum upper_128bits_state state
;
76 /* TRUE if state of the upper 128bits of AVX registers is unchanged
79 /* TRUE if block has been processed. */
81 /* TRUE if block has been scanned. */
83 /* Previous state of the upper 128bits of AVX registers at entry. */
84 enum upper_128bits_state prev
;
87 #define BLOCK_INFO(B) ((block_info) (B)->aux)
89 enum call_avx256_state
91 /* Callee returns 256bit AVX register. */
92 callee_return_avx256
= -1,
93 /* Callee returns and passes 256bit AVX register. */
94 callee_return_pass_avx256
,
95 /* Callee passes 256bit AVX register. */
97 /* Callee doesn't return nor passe 256bit AVX register, or no
98 256bit AVX register in function return. */
100 /* vzeroupper intrinsic. */
104 /* Check if a 256bit AVX register is referenced in stores. */
107 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
110 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
111 || (GET_CODE (set
) == SET
112 && REG_P (SET_SRC (set
))
113 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
115 enum upper_128bits_state
*state
116 = (enum upper_128bits_state
*) data
;
121 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
122 in basic block BB. Delete it if upper 128bit AVX registers are
123 unused. If it isn't deleted, move it to just before a jump insn.
125 STATE is state of the upper 128bits of AVX registers at entry. */
128 move_or_delete_vzeroupper_2 (basic_block bb
,
129 enum upper_128bits_state state
)
132 rtx vzeroupper_insn
= NULL_RTX
;
137 if (BLOCK_INFO (bb
)->unchanged
)
140 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
143 BLOCK_INFO (bb
)->state
= state
;
147 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
150 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
151 bb
->index
, BLOCK_INFO (bb
)->state
);
155 BLOCK_INFO (bb
)->prev
= state
;
158 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
163 /* BB_END changes when it is deleted. */
164 bb_end
= BB_END (bb
);
166 while (insn
!= bb_end
)
168 insn
= NEXT_INSN (insn
);
170 if (!NONDEBUG_INSN_P (insn
))
173 /* Move vzeroupper before jump/call. */
174 if (JUMP_P (insn
) || CALL_P (insn
))
176 if (!vzeroupper_insn
)
179 if (PREV_INSN (insn
) != vzeroupper_insn
)
183 fprintf (dump_file
, "Move vzeroupper after:\n");
184 print_rtl_single (dump_file
, PREV_INSN (insn
));
185 fprintf (dump_file
, "before:\n");
186 print_rtl_single (dump_file
, insn
);
188 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
191 vzeroupper_insn
= NULL_RTX
;
195 pat
= PATTERN (insn
);
197 /* Check insn for vzeroupper intrinsic. */
198 if (GET_CODE (pat
) == UNSPEC_VOLATILE
199 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
203 /* Found vzeroupper intrinsic. */
204 fprintf (dump_file
, "Found vzeroupper:\n");
205 print_rtl_single (dump_file
, insn
);
210 /* Check insn for vzeroall intrinsic. */
211 if (GET_CODE (pat
) == PARALLEL
212 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
213 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
218 /* Delete pending vzeroupper insertion. */
221 delete_insn (vzeroupper_insn
);
222 vzeroupper_insn
= NULL_RTX
;
225 else if (state
!= used
)
227 note_stores (pat
, check_avx256_stores
, &state
);
234 /* Process vzeroupper intrinsic. */
235 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
239 /* Since the upper 128bits are cleared, callee must not pass
240 256bit AVX register. We only need to check if callee
241 returns 256bit AVX register. */
242 if (avx256
== callee_return_avx256
)
248 /* Remove unnecessary vzeroupper since upper 128bits are
252 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
253 print_rtl_single (dump_file
, insn
);
259 /* Set state to UNUSED if callee doesn't return 256bit AVX
261 if (avx256
!= callee_return_pass_avx256
)
264 if (avx256
== callee_return_pass_avx256
265 || avx256
== callee_pass_avx256
)
267 /* Must remove vzeroupper since callee passes in 256bit
271 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
272 print_rtl_single (dump_file
, insn
);
278 vzeroupper_insn
= insn
;
284 BLOCK_INFO (bb
)->state
= state
;
285 BLOCK_INFO (bb
)->unchanged
= unchanged
;
286 BLOCK_INFO (bb
)->scanned
= true;
289 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
290 bb
->index
, unchanged
? "unchanged" : "changed",
294 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
295 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
296 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
300 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
304 enum upper_128bits_state state
, old_state
, new_state
;
308 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
309 block
->index
, BLOCK_INFO (block
)->processed
);
311 if (BLOCK_INFO (block
)->processed
)
316 /* Check all predecessor edges of this block. */
317 seen_unknown
= false;
318 FOR_EACH_EDGE (e
, ei
, block
->preds
)
322 switch (BLOCK_INFO (e
->src
)->state
)
325 if (!unknown_is_unused
)
339 old_state
= BLOCK_INFO (block
)->state
;
340 move_or_delete_vzeroupper_2 (block
, state
);
341 new_state
= BLOCK_INFO (block
)->state
;
343 if (state
!= unknown
|| new_state
== used
)
344 BLOCK_INFO (block
)->processed
= true;
346 /* Need to rescan if the upper 128bits of AVX registers are changed
348 if (new_state
!= old_state
)
350 if (new_state
== used
)
351 cfun
->machine
->rescan_vzeroupper_p
= 1;
358 /* Go through the instruction stream looking for vzeroupper. Delete
359 it if upper 128bit AVX registers are unused. If it isn't deleted,
360 move it to just before a jump insn. */
363 move_or_delete_vzeroupper (void)
368 fibheap_t worklist
, pending
, fibheap_swap
;
369 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
374 /* Set up block info for each basic block. */
375 alloc_aux_for_blocks (sizeof (struct block_info_def
));
377 /* Process outgoing edges of entry point. */
379 fprintf (dump_file
, "Process outgoing edges of entry point\n");
381 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
383 move_or_delete_vzeroupper_2 (e
->dest
,
384 cfun
->machine
->caller_pass_avx256_p
386 BLOCK_INFO (e
->dest
)->processed
= true;
389 /* Compute reverse completion order of depth first search of the CFG
390 so that the data-flow runs faster. */
391 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
392 bb_order
= XNEWVEC (int, last_basic_block
);
393 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
394 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
395 bb_order
[rc_order
[i
]] = i
;
398 worklist
= fibheap_new ();
399 pending
= fibheap_new ();
400 visited
= sbitmap_alloc (last_basic_block
);
401 in_worklist
= sbitmap_alloc (last_basic_block
);
402 in_pending
= sbitmap_alloc (last_basic_block
);
403 sbitmap_zero (in_worklist
);
405 /* Don't check outgoing edges of entry point. */
406 sbitmap_ones (in_pending
);
408 if (BLOCK_INFO (bb
)->processed
)
409 RESET_BIT (in_pending
, bb
->index
);
412 move_or_delete_vzeroupper_1 (bb
, false);
413 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
417 fprintf (dump_file
, "Check remaining basic blocks\n");
419 while (!fibheap_empty (pending
))
421 fibheap_swap
= pending
;
423 worklist
= fibheap_swap
;
424 sbitmap_swap
= in_pending
;
425 in_pending
= in_worklist
;
426 in_worklist
= sbitmap_swap
;
428 sbitmap_zero (visited
);
430 cfun
->machine
->rescan_vzeroupper_p
= 0;
432 while (!fibheap_empty (worklist
))
434 bb
= (basic_block
) fibheap_extract_min (worklist
);
435 RESET_BIT (in_worklist
, bb
->index
);
436 gcc_assert (!TEST_BIT (visited
, bb
->index
));
437 if (!TEST_BIT (visited
, bb
->index
))
441 SET_BIT (visited
, bb
->index
);
443 if (move_or_delete_vzeroupper_1 (bb
, false))
444 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
446 if (e
->dest
== EXIT_BLOCK_PTR
447 || BLOCK_INFO (e
->dest
)->processed
)
450 if (TEST_BIT (visited
, e
->dest
->index
))
452 if (!TEST_BIT (in_pending
, e
->dest
->index
))
454 /* Send E->DEST to next round. */
455 SET_BIT (in_pending
, e
->dest
->index
);
456 fibheap_insert (pending
,
457 bb_order
[e
->dest
->index
],
461 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
463 /* Add E->DEST to current round. */
464 SET_BIT (in_worklist
, e
->dest
->index
);
465 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
472 if (!cfun
->machine
->rescan_vzeroupper_p
)
477 fibheap_delete (worklist
);
478 fibheap_delete (pending
);
479 sbitmap_free (visited
);
480 sbitmap_free (in_worklist
);
481 sbitmap_free (in_pending
);
484 fprintf (dump_file
, "Process remaining basic blocks\n");
487 move_or_delete_vzeroupper_1 (bb
, true);
489 free_aux_for_blocks ();
492 static rtx
legitimize_dllimport_symbol (rtx
, bool);
494 #ifndef CHECK_STACK_LIMIT
495 #define CHECK_STACK_LIMIT (-1)
498 /* Return index of given mode in mult and division cost tables. */
499 #define MODE_INDEX(mode) \
500 ((mode) == QImode ? 0 \
501 : (mode) == HImode ? 1 \
502 : (mode) == SImode ? 2 \
503 : (mode) == DImode ? 3 \
506 /* Processor costs (relative to an add) */
507 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
508 #define COSTS_N_BYTES(N) ((N) * 2)
510 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
513 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
514 COSTS_N_BYTES (2), /* cost of an add instruction */
515 COSTS_N_BYTES (3), /* cost of a lea instruction */
516 COSTS_N_BYTES (2), /* variable shift costs */
517 COSTS_N_BYTES (3), /* constant shift costs */
518 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
519 COSTS_N_BYTES (3), /* HI */
520 COSTS_N_BYTES (3), /* SI */
521 COSTS_N_BYTES (3), /* DI */
522 COSTS_N_BYTES (5)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
525 COSTS_N_BYTES (3), /* HI */
526 COSTS_N_BYTES (3), /* SI */
527 COSTS_N_BYTES (3), /* DI */
528 COSTS_N_BYTES (5)}, /* other */
529 COSTS_N_BYTES (3), /* cost of movsx */
530 COSTS_N_BYTES (3), /* cost of movzx */
531 0, /* "large" insn */
533 2, /* cost for loading QImode using movzbl */
534 {2, 2, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 2}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {2, 2, 2}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 3, /* cost of moving MMX register */
544 {3, 3}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {3, 3}, /* cost of storing MMX registers
547 in SImode and DImode */
548 3, /* cost of moving SSE register */
549 {3, 3, 3}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {3, 3, 3}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
559 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
561 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
562 COSTS_N_BYTES (2), /* cost of FABS instruction. */
563 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
564 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
566 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
567 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
568 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 1, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 1, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 /* Processor costs (relative to an add) */
584 struct processor_costs i386_cost
= { /* 386 specific costs */
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (1), /* cost of a lea instruction */
587 COSTS_N_INSNS (3), /* variable shift costs */
588 COSTS_N_INSNS (2), /* constant shift costs */
589 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (6), /* HI */
591 COSTS_N_INSNS (6), /* SI */
592 COSTS_N_INSNS (6), /* DI */
593 COSTS_N_INSNS (6)}, /* other */
594 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (23), /* HI */
597 COSTS_N_INSNS (23), /* SI */
598 COSTS_N_INSNS (23), /* DI */
599 COSTS_N_INSNS (23)}, /* other */
600 COSTS_N_INSNS (3), /* cost of movsx */
601 COSTS_N_INSNS (2), /* cost of movzx */
602 15, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {2, 4, 2}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {2, 4, 2}, /* cost of storing integer registers */
609 2, /* cost of reg,reg fld/fst */
610 {8, 8, 8}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {8, 8, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 8}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 8}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 8, 16}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 8, 16}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 3, /* MMX or SSE register to integer */
625 0, /* size of l1 cache */
626 0, /* size of l2 cache */
627 0, /* size of prefetch block */
628 0, /* number of parallel prefetches */
630 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (22), /* cost of FABS instruction. */
634 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
636 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
637 DUMMY_STRINGOP_ALGS
},
638 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
639 DUMMY_STRINGOP_ALGS
},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs i486_cost
= { /* 486 specific costs */
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (1), /* cost of a lea instruction */
657 COSTS_N_INSNS (3), /* variable shift costs */
658 COSTS_N_INSNS (2), /* constant shift costs */
659 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (12), /* HI */
661 COSTS_N_INSNS (12), /* SI */
662 COSTS_N_INSNS (12), /* DI */
663 COSTS_N_INSNS (12)}, /* other */
664 1, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (40), /* HI */
667 COSTS_N_INSNS (40), /* SI */
668 COSTS_N_INSNS (40), /* DI */
669 COSTS_N_INSNS (40)}, /* other */
670 COSTS_N_INSNS (3), /* cost of movsx */
671 COSTS_N_INSNS (2), /* cost of movzx */
672 15, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {2, 4, 2}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {2, 4, 2}, /* cost of storing integer registers */
679 2, /* cost of reg,reg fld/fst */
680 {8, 8, 8}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {8, 8, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {4, 8}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 8}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 8, 16}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 8, 16}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 3, /* MMX or SSE register to integer */
695 4, /* size of l1 cache. 486 has 8kB cache
696 shared for code and data, so 4kB is
697 not really precise. */
698 4, /* size of l2 cache */
699 0, /* size of prefetch block */
700 0, /* number of parallel prefetches */
702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (3), /* cost of FABS instruction. */
706 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
708 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
709 DUMMY_STRINGOP_ALGS
},
710 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
711 DUMMY_STRINGOP_ALGS
},
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
726 struct processor_costs pentium_cost
= {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1), /* cost of a lea instruction */
729 COSTS_N_INSNS (4), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (11), /* HI */
733 COSTS_N_INSNS (11), /* SI */
734 COSTS_N_INSNS (11), /* DI */
735 COSTS_N_INSNS (11)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (25), /* HI */
739 COSTS_N_INSNS (25), /* SI */
740 COSTS_N_INSNS (25), /* DI */
741 COSTS_N_INSNS (25)}, /* other */
742 COSTS_N_INSNS (3), /* cost of movsx */
743 COSTS_N_INSNS (2), /* cost of movzx */
744 8, /* "large" insn */
746 6, /* cost for loading QImode using movzbl */
747 {2, 4, 2}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {2, 4, 2}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {2, 2, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 6}, /* cost of storing fp registers
755 in SFmode, DFmode and XFmode */
756 8, /* cost of moving MMX register */
757 {8, 8}, /* cost of loading MMX registers
758 in SImode and DImode */
759 {8, 8}, /* cost of storing MMX registers
760 in SImode and DImode */
761 2, /* cost of moving SSE register */
762 {4, 8, 16}, /* cost of loading SSE registers
763 in SImode, DImode and TImode */
764 {4, 8, 16}, /* cost of storing SSE registers
765 in SImode, DImode and TImode */
766 3, /* MMX or SSE register to integer */
767 8, /* size of l1 cache. */
768 8, /* size of l2 cache */
769 0, /* size of prefetch block */
770 0, /* number of parallel prefetches */
772 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
773 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
774 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
775 COSTS_N_INSNS (1), /* cost of FABS instruction. */
776 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
777 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
778 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
779 DUMMY_STRINGOP_ALGS
},
780 {{libcall
, {{-1, rep_prefix_4_byte
}}},
781 DUMMY_STRINGOP_ALGS
},
782 1, /* scalar_stmt_cost. */
783 1, /* scalar load_cost. */
784 1, /* scalar_store_cost. */
785 1, /* vec_stmt_cost. */
786 1, /* vec_to_scalar_cost. */
787 1, /* scalar_to_vec_cost. */
788 1, /* vec_align_load_cost. */
789 2, /* vec_unalign_load_cost. */
790 1, /* vec_store_cost. */
791 3, /* cond_taken_branch_cost. */
792 1, /* cond_not_taken_branch_cost. */
796 struct processor_costs pentiumpro_cost
= {
797 COSTS_N_INSNS (1), /* cost of an add instruction */
798 COSTS_N_INSNS (1), /* cost of a lea instruction */
799 COSTS_N_INSNS (1), /* variable shift costs */
800 COSTS_N_INSNS (1), /* constant shift costs */
801 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
802 COSTS_N_INSNS (4), /* HI */
803 COSTS_N_INSNS (4), /* SI */
804 COSTS_N_INSNS (4), /* DI */
805 COSTS_N_INSNS (4)}, /* other */
806 0, /* cost of multiply per each bit set */
807 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
808 COSTS_N_INSNS (17), /* HI */
809 COSTS_N_INSNS (17), /* SI */
810 COSTS_N_INSNS (17), /* DI */
811 COSTS_N_INSNS (17)}, /* other */
812 COSTS_N_INSNS (1), /* cost of movsx */
813 COSTS_N_INSNS (1), /* cost of movzx */
814 8, /* "large" insn */
816 2, /* cost for loading QImode using movzbl */
817 {4, 4, 4}, /* cost of loading integer registers
818 in QImode, HImode and SImode.
819 Relative to reg-reg move (2). */
820 {2, 2, 2}, /* cost of storing integer registers */
821 2, /* cost of reg,reg fld/fst */
822 {2, 2, 6}, /* cost of loading fp registers
823 in SFmode, DFmode and XFmode */
824 {4, 4, 6}, /* cost of storing fp registers
825 in SFmode, DFmode and XFmode */
826 2, /* cost of moving MMX register */
827 {2, 2}, /* cost of loading MMX registers
828 in SImode and DImode */
829 {2, 2}, /* cost of storing MMX registers
830 in SImode and DImode */
831 2, /* cost of moving SSE register */
832 {2, 2, 8}, /* cost of loading SSE registers
833 in SImode, DImode and TImode */
834 {2, 2, 8}, /* cost of storing SSE registers
835 in SImode, DImode and TImode */
836 3, /* MMX or SSE register to integer */
837 8, /* size of l1 cache. */
838 256, /* size of l2 cache */
839 32, /* size of prefetch block */
840 6, /* number of parallel prefetches */
842 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
843 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
844 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
845 COSTS_N_INSNS (2), /* cost of FABS instruction. */
846 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
847 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
848 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
849 (we ensure the alignment). For small blocks inline loop is still a
850 noticeable win, for bigger blocks either rep movsl or rep movsb is
851 way to go. Rep movsb has apparently more expensive startup time in CPU,
852 but after 4K the difference is down in the noise. */
853 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
854 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
855 DUMMY_STRINGOP_ALGS
},
856 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
857 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
858 DUMMY_STRINGOP_ALGS
},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
873 struct processor_costs geode_cost
= {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (1), /* cost of a lea instruction */
876 COSTS_N_INSNS (2), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (4), /* HI */
880 COSTS_N_INSNS (7), /* SI */
881 COSTS_N_INSNS (7), /* DI */
882 COSTS_N_INSNS (7)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (23), /* HI */
886 COSTS_N_INSNS (39), /* SI */
887 COSTS_N_INSNS (39), /* DI */
888 COSTS_N_INSNS (39)}, /* other */
889 COSTS_N_INSNS (1), /* cost of movsx */
890 COSTS_N_INSNS (1), /* cost of movzx */
891 8, /* "large" insn */
893 1, /* cost for loading QImode using movzbl */
894 {1, 1, 1}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {1, 1, 1}, /* cost of storing integer registers */
898 1, /* cost of reg,reg fld/fst */
899 {1, 1, 1}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 6, 6}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
904 1, /* cost of moving MMX register */
905 {1, 1}, /* cost of loading MMX registers
906 in SImode and DImode */
907 {1, 1}, /* cost of storing MMX registers
908 in SImode and DImode */
909 1, /* cost of moving SSE register */
910 {1, 1, 1}, /* cost of loading SSE registers
911 in SImode, DImode and TImode */
912 {1, 1, 1}, /* cost of storing SSE registers
913 in SImode, DImode and TImode */
914 1, /* MMX or SSE register to integer */
915 64, /* size of l1 cache. */
916 128, /* size of l2 cache. */
917 32, /* size of prefetch block */
918 1, /* number of parallel prefetches */
920 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (1), /* cost of FABS instruction. */
924 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
926 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
927 DUMMY_STRINGOP_ALGS
},
928 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
929 DUMMY_STRINGOP_ALGS
},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k6_cost
= {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (3), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (3), /* DI */
953 COSTS_N_INSNS (3)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (18), /* HI */
957 COSTS_N_INSNS (18), /* SI */
958 COSTS_N_INSNS (18), /* DI */
959 COSTS_N_INSNS (18)}, /* other */
960 COSTS_N_INSNS (2), /* cost of movsx */
961 COSTS_N_INSNS (2), /* cost of movzx */
962 8, /* "large" insn */
964 3, /* cost for loading QImode using movzbl */
965 {4, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {2, 3, 2}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {6, 6, 6}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 4}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {2, 2}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {2, 2}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {2, 2, 8}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {2, 2, 8}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 6, /* MMX or SSE register to integer */
985 32, /* size of l1 cache. */
986 32, /* size of l2 cache. Some models
987 have integrated l2 cache, but
988 optimizing for k6 is not important
989 enough to worry about that. */
990 32, /* size of prefetch block */
991 1, /* number of parallel prefetches */
993 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
994 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
995 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
996 COSTS_N_INSNS (2), /* cost of FABS instruction. */
997 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
998 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
999 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1000 DUMMY_STRINGOP_ALGS
},
1001 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1002 DUMMY_STRINGOP_ALGS
},
1003 1, /* scalar_stmt_cost. */
1004 1, /* scalar load_cost. */
1005 1, /* scalar_store_cost. */
1006 1, /* vec_stmt_cost. */
1007 1, /* vec_to_scalar_cost. */
1008 1, /* scalar_to_vec_cost. */
1009 1, /* vec_align_load_cost. */
1010 2, /* vec_unalign_load_cost. */
1011 1, /* vec_store_cost. */
1012 3, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs athlon_cost
= {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (2), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (5), /* HI */
1024 COSTS_N_INSNS (5), /* SI */
1025 COSTS_N_INSNS (5), /* DI */
1026 COSTS_N_INSNS (5)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (26), /* HI */
1030 COSTS_N_INSNS (42), /* SI */
1031 COSTS_N_INSNS (74), /* DI */
1032 COSTS_N_INSNS (74)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {3, 4, 3}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {3, 4, 3}, /* cost of storing integer registers */
1042 4, /* cost of reg,reg fld/fst */
1043 {4, 4, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {6, 6, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 6}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 5}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 5, /* MMX or SSE register to integer */
1058 64, /* size of l1 cache. */
1059 256, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 6, /* number of parallel prefetches */
1062 5, /* Branch cost */
1063 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1069 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1070 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1071 128 bytes for memset. */
1072 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1073 DUMMY_STRINGOP_ALGS
},
1074 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1075 DUMMY_STRINGOP_ALGS
},
1076 1, /* scalar_stmt_cost. */
1077 1, /* scalar load_cost. */
1078 1, /* scalar_store_cost. */
1079 1, /* vec_stmt_cost. */
1080 1, /* vec_to_scalar_cost. */
1081 1, /* scalar_to_vec_cost. */
1082 1, /* vec_align_load_cost. */
1083 2, /* vec_unalign_load_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1090 struct processor_costs k8_cost
= {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (2), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (3), /* SI */
1098 COSTS_N_INSNS (4), /* DI */
1099 COSTS_N_INSNS (5)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (26), /* HI */
1103 COSTS_N_INSNS (42), /* SI */
1104 COSTS_N_INSNS (74), /* DI */
1105 COSTS_N_INSNS (74)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {3, 4, 3}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {3, 4, 3}, /* cost of storing integer registers */
1115 4, /* cost of reg,reg fld/fst */
1116 {4, 4, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {6, 6, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {3, 3}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 3, 6}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 5}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 5, /* MMX or SSE register to integer */
1131 64, /* size of l1 cache. */
1132 512, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 3, /* Branch cost */
1141 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1147 /* K8 has optimized REP instruction for medium sized blocks, but for very
1148 small blocks it is better to use loop. For large blocks, libcall can
1149 do nontemporary accesses and beat inline considerably. */
1150 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1151 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1152 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1153 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1154 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1155 4, /* scalar_stmt_cost. */
1156 2, /* scalar load_cost. */
1157 2, /* scalar_store_cost. */
1158 5, /* vec_stmt_cost. */
1159 0, /* vec_to_scalar_cost. */
1160 2, /* scalar_to_vec_cost. */
1161 2, /* vec_align_load_cost. */
1162 3, /* vec_unalign_load_cost. */
1163 3, /* vec_store_cost. */
1164 3, /* cond_taken_branch_cost. */
1165 2, /* cond_not_taken_branch_cost. */
1168 struct processor_costs amdfam10_cost
= {
1169 COSTS_N_INSNS (1), /* cost of an add instruction */
1170 COSTS_N_INSNS (2), /* cost of a lea instruction */
1171 COSTS_N_INSNS (1), /* variable shift costs */
1172 COSTS_N_INSNS (1), /* constant shift costs */
1173 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1174 COSTS_N_INSNS (4), /* HI */
1175 COSTS_N_INSNS (3), /* SI */
1176 COSTS_N_INSNS (4), /* DI */
1177 COSTS_N_INSNS (5)}, /* other */
1178 0, /* cost of multiply per each bit set */
1179 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1180 COSTS_N_INSNS (35), /* HI */
1181 COSTS_N_INSNS (51), /* SI */
1182 COSTS_N_INSNS (83), /* DI */
1183 COSTS_N_INSNS (83)}, /* other */
1184 COSTS_N_INSNS (1), /* cost of movsx */
1185 COSTS_N_INSNS (1), /* cost of movzx */
1186 8, /* "large" insn */
1188 4, /* cost for loading QImode using movzbl */
1189 {3, 4, 3}, /* cost of loading integer registers
1190 in QImode, HImode and SImode.
1191 Relative to reg-reg move (2). */
1192 {3, 4, 3}, /* cost of storing integer registers */
1193 4, /* cost of reg,reg fld/fst */
1194 {4, 4, 12}, /* cost of loading fp registers
1195 in SFmode, DFmode and XFmode */
1196 {6, 6, 8}, /* cost of storing fp registers
1197 in SFmode, DFmode and XFmode */
1198 2, /* cost of moving MMX register */
1199 {3, 3}, /* cost of loading MMX registers
1200 in SImode and DImode */
1201 {4, 4}, /* cost of storing MMX registers
1202 in SImode and DImode */
1203 2, /* cost of moving SSE register */
1204 {4, 4, 3}, /* cost of loading SSE registers
1205 in SImode, DImode and TImode */
1206 {4, 4, 5}, /* cost of storing SSE registers
1207 in SImode, DImode and TImode */
1208 3, /* MMX or SSE register to integer */
1210 MOVD reg64, xmmreg Double FSTORE 4
1211 MOVD reg32, xmmreg Double FSTORE 4
1213 MOVD reg64, xmmreg Double FADD 3
1215 MOVD reg32, xmmreg Double FADD 3
1217 64, /* size of l1 cache. */
1218 512, /* size of l2 cache. */
1219 64, /* size of prefetch block */
1220 /* New AMD processors never drop prefetches; if they cannot be performed
1221 immediately, they are queued. We set number of simultaneous prefetches
1222 to a large constant to reflect this (it probably is not a good idea not
1223 to limit number of prefetches at all, as their execution also takes some
1225 100, /* number of parallel prefetches */
1226 2, /* Branch cost */
1227 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1228 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1229 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1230 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1231 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1232 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1234 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1235 very small blocks it is better to use loop. For large blocks, libcall can
1236 do nontemporary accesses and beat inline considerably. */
1237 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1238 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1239 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1240 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1241 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1242 4, /* scalar_stmt_cost. */
1243 2, /* scalar load_cost. */
1244 2, /* scalar_store_cost. */
1245 6, /* vec_stmt_cost. */
1246 0, /* vec_to_scalar_cost. */
1247 2, /* scalar_to_vec_cost. */
1248 2, /* vec_align_load_cost. */
1249 2, /* vec_unalign_load_cost. */
1250 2, /* vec_store_cost. */
1251 2, /* cond_taken_branch_cost. */
1252 1, /* cond_not_taken_branch_cost. */
1255 struct processor_costs bdver1_cost
= {
1256 COSTS_N_INSNS (1), /* cost of an add instruction */
1257 COSTS_N_INSNS (1), /* cost of a lea instruction */
1258 COSTS_N_INSNS (1), /* variable shift costs */
1259 COSTS_N_INSNS (1), /* constant shift costs */
1260 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1261 COSTS_N_INSNS (4), /* HI */
1262 COSTS_N_INSNS (4), /* SI */
1263 COSTS_N_INSNS (6), /* DI */
1264 COSTS_N_INSNS (6)}, /* other */
1265 0, /* cost of multiply per each bit set */
1266 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1267 COSTS_N_INSNS (35), /* HI */
1268 COSTS_N_INSNS (51), /* SI */
1269 COSTS_N_INSNS (83), /* DI */
1270 COSTS_N_INSNS (83)}, /* other */
1271 COSTS_N_INSNS (1), /* cost of movsx */
1272 COSTS_N_INSNS (1), /* cost of movzx */
1273 8, /* "large" insn */
1275 4, /* cost for loading QImode using movzbl */
1276 {5, 5, 4}, /* cost of loading integer registers
1277 in QImode, HImode and SImode.
1278 Relative to reg-reg move (2). */
1279 {4, 4, 4}, /* cost of storing integer registers */
1280 2, /* cost of reg,reg fld/fst */
1281 {5, 5, 12}, /* cost of loading fp registers
1282 in SFmode, DFmode and XFmode */
1283 {4, 4, 8}, /* cost of storing fp registers
1284 in SFmode, DFmode and XFmode */
1285 2, /* cost of moving MMX register */
1286 {4, 4}, /* cost of loading MMX registers
1287 in SImode and DImode */
1288 {4, 4}, /* cost of storing MMX registers
1289 in SImode and DImode */
1290 2, /* cost of moving SSE register */
1291 {4, 4, 4}, /* cost of loading SSE registers
1292 in SImode, DImode and TImode */
1293 {4, 4, 4}, /* cost of storing SSE registers
1294 in SImode, DImode and TImode */
1295 2, /* MMX or SSE register to integer */
1297 MOVD reg64, xmmreg Double FSTORE 4
1298 MOVD reg32, xmmreg Double FSTORE 4
1300 MOVD reg64, xmmreg Double FADD 3
1302 MOVD reg32, xmmreg Double FADD 3
1304 16, /* size of l1 cache. */
1305 2048, /* size of l2 cache. */
1306 64, /* size of prefetch block */
1307 /* New AMD processors never drop prefetches; if they cannot be performed
1308 immediately, they are queued. We set number of simultaneous prefetches
1309 to a large constant to reflect this (it probably is not a good idea not
1310 to limit number of prefetches at all, as their execution also takes some
1312 100, /* number of parallel prefetches */
1313 2, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1322 very small blocks it is better to use loop. For large blocks, libcall
1323 can do nontemporary accesses and beat inline considerably. */
1324 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1325 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1326 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1327 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1328 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1329 6, /* scalar_stmt_cost. */
1330 4, /* scalar load_cost. */
1331 4, /* scalar_store_cost. */
1332 6, /* vec_stmt_cost. */
1333 0, /* vec_to_scalar_cost. */
1334 2, /* scalar_to_vec_cost. */
1335 4, /* vec_align_load_cost. */
1336 4, /* vec_unalign_load_cost. */
1337 4, /* vec_store_cost. */
1338 2, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1342 struct processor_costs bdver2_cost
= {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (1), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (4), /* HI */
1349 COSTS_N_INSNS (4), /* SI */
1350 COSTS_N_INSNS (6), /* DI */
1351 COSTS_N_INSNS (6)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (35), /* HI */
1355 COSTS_N_INSNS (51), /* SI */
1356 COSTS_N_INSNS (83), /* DI */
1357 COSTS_N_INSNS (83)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 8, /* "large" insn */
1362 4, /* cost for loading QImode using movzbl */
1363 {5, 5, 4}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {4, 4, 4}, /* cost of storing integer registers */
1367 2, /* cost of reg,reg fld/fst */
1368 {5, 5, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {4, 4, 8}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 2, /* cost of moving MMX register */
1373 {4, 4}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {4, 4}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 2, /* cost of moving SSE register */
1378 {4, 4, 4}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {4, 4, 4}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 2, /* MMX or SSE register to integer */
1384 MOVD reg64, xmmreg Double FSTORE 4
1385 MOVD reg32, xmmreg Double FSTORE 4
1387 MOVD reg64, xmmreg Double FADD 3
1389 MOVD reg32, xmmreg Double FADD 3
1391 16, /* size of l1 cache. */
1392 2048, /* size of l2 cache. */
1393 64, /* size of prefetch block */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches */
1400 2, /* Branch cost */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1408 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1409 very small blocks it is better to use loop. For large blocks, libcall
1410 can do nontemporary accesses and beat inline considerably. */
1411 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1412 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1413 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1414 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1415 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1416 6, /* scalar_stmt_cost. */
1417 4, /* scalar load_cost. */
1418 4, /* scalar_store_cost. */
1419 6, /* vec_stmt_cost. */
1420 0, /* vec_to_scalar_cost. */
1421 2, /* scalar_to_vec_cost. */
1422 4, /* vec_align_load_cost. */
1423 4, /* vec_unalign_load_cost. */
1424 4, /* vec_store_cost. */
1425 2, /* cond_taken_branch_cost. */
1426 1, /* cond_not_taken_branch_cost. */
1429 struct processor_costs btver1_cost
= {
1430 COSTS_N_INSNS (1), /* cost of an add instruction */
1431 COSTS_N_INSNS (2), /* cost of a lea instruction */
1432 COSTS_N_INSNS (1), /* variable shift costs */
1433 COSTS_N_INSNS (1), /* constant shift costs */
1434 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1435 COSTS_N_INSNS (4), /* HI */
1436 COSTS_N_INSNS (3), /* SI */
1437 COSTS_N_INSNS (4), /* DI */
1438 COSTS_N_INSNS (5)}, /* other */
1439 0, /* cost of multiply per each bit set */
1440 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1441 COSTS_N_INSNS (35), /* HI */
1442 COSTS_N_INSNS (51), /* SI */
1443 COSTS_N_INSNS (83), /* DI */
1444 COSTS_N_INSNS (83)}, /* other */
1445 COSTS_N_INSNS (1), /* cost of movsx */
1446 COSTS_N_INSNS (1), /* cost of movzx */
1447 8, /* "large" insn */
1449 4, /* cost for loading QImode using movzbl */
1450 {3, 4, 3}, /* cost of loading integer registers
1451 in QImode, HImode and SImode.
1452 Relative to reg-reg move (2). */
1453 {3, 4, 3}, /* cost of storing integer registers */
1454 4, /* cost of reg,reg fld/fst */
1455 {4, 4, 12}, /* cost of loading fp registers
1456 in SFmode, DFmode and XFmode */
1457 {6, 6, 8}, /* cost of storing fp registers
1458 in SFmode, DFmode and XFmode */
1459 2, /* cost of moving MMX register */
1460 {3, 3}, /* cost of loading MMX registers
1461 in SImode and DImode */
1462 {4, 4}, /* cost of storing MMX registers
1463 in SImode and DImode */
1464 2, /* cost of moving SSE register */
1465 {4, 4, 3}, /* cost of loading SSE registers
1466 in SImode, DImode and TImode */
1467 {4, 4, 5}, /* cost of storing SSE registers
1468 in SImode, DImode and TImode */
1469 3, /* MMX or SSE register to integer */
1471 MOVD reg64, xmmreg Double FSTORE 4
1472 MOVD reg32, xmmreg Double FSTORE 4
1474 MOVD reg64, xmmreg Double FADD 3
1476 MOVD reg32, xmmreg Double FADD 3
1478 32, /* size of l1 cache. */
1479 512, /* size of l2 cache. */
1480 64, /* size of prefetch block */
1481 100, /* number of parallel prefetches */
1482 2, /* Branch cost */
1483 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1484 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1485 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1486 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1487 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1488 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1490 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1491 very small blocks it is better to use loop. For large blocks, libcall can
1492 do nontemporary accesses and beat inline considerably. */
1493 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1494 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1495 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1496 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1497 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1498 4, /* scalar_stmt_cost. */
1499 2, /* scalar load_cost. */
1500 2, /* scalar_store_cost. */
1501 6, /* vec_stmt_cost. */
1502 0, /* vec_to_scalar_cost. */
1503 2, /* scalar_to_vec_cost. */
1504 2, /* vec_align_load_cost. */
1505 2, /* vec_unalign_load_cost. */
1506 2, /* vec_store_cost. */
1507 2, /* cond_taken_branch_cost. */
1508 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs pentium4_cost
= {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (3), /* cost of a lea instruction */
1515 COSTS_N_INSNS (4), /* variable shift costs */
1516 COSTS_N_INSNS (4), /* constant shift costs */
1517 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (15), /* HI */
1519 COSTS_N_INSNS (15), /* SI */
1520 COSTS_N_INSNS (15), /* DI */
1521 COSTS_N_INSNS (15)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (56), /* HI */
1525 COSTS_N_INSNS (56), /* SI */
1526 COSTS_N_INSNS (56), /* DI */
1527 COSTS_N_INSNS (56)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 16, /* "large" insn */
1532 2, /* cost for loading QImode using movzbl */
1533 {4, 5, 4}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {2, 3, 2}, /* cost of storing integer registers */
1537 2, /* cost of reg,reg fld/fst */
1538 {2, 2, 6}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {4, 4, 6}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {2, 2}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {2, 2}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 12, /* cost of moving SSE register */
1548 {12, 12, 12}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {2, 2, 8}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 10, /* MMX or SSE register to integer */
1553 8, /* size of l1 cache. */
1554 256, /* size of l2 cache. */
1555 64, /* size of prefetch block */
1556 6, /* number of parallel prefetches */
1557 2, /* Branch cost */
1558 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1559 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1560 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1563 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1564 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1565 DUMMY_STRINGOP_ALGS
},
1566 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1568 DUMMY_STRINGOP_ALGS
},
1569 1, /* scalar_stmt_cost. */
1570 1, /* scalar load_cost. */
1571 1, /* scalar_store_cost. */
1572 1, /* vec_stmt_cost. */
1573 1, /* vec_to_scalar_cost. */
1574 1, /* scalar_to_vec_cost. */
1575 1, /* vec_align_load_cost. */
1576 2, /* vec_unalign_load_cost. */
1577 1, /* vec_store_cost. */
1578 3, /* cond_taken_branch_cost. */
1579 1, /* cond_not_taken_branch_cost. */
1583 struct processor_costs nocona_cost
= {
1584 COSTS_N_INSNS (1), /* cost of an add instruction */
1585 COSTS_N_INSNS (1), /* cost of a lea instruction */
1586 COSTS_N_INSNS (1), /* variable shift costs */
1587 COSTS_N_INSNS (1), /* constant shift costs */
1588 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1589 COSTS_N_INSNS (10), /* HI */
1590 COSTS_N_INSNS (10), /* SI */
1591 COSTS_N_INSNS (10), /* DI */
1592 COSTS_N_INSNS (10)}, /* other */
1593 0, /* cost of multiply per each bit set */
1594 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1595 COSTS_N_INSNS (66), /* HI */
1596 COSTS_N_INSNS (66), /* SI */
1597 COSTS_N_INSNS (66), /* DI */
1598 COSTS_N_INSNS (66)}, /* other */
1599 COSTS_N_INSNS (1), /* cost of movsx */
1600 COSTS_N_INSNS (1), /* cost of movzx */
1601 16, /* "large" insn */
1602 17, /* MOVE_RATIO */
1603 4, /* cost for loading QImode using movzbl */
1604 {4, 4, 4}, /* cost of loading integer registers
1605 in QImode, HImode and SImode.
1606 Relative to reg-reg move (2). */
1607 {4, 4, 4}, /* cost of storing integer registers */
1608 3, /* cost of reg,reg fld/fst */
1609 {12, 12, 12}, /* cost of loading fp registers
1610 in SFmode, DFmode and XFmode */
1611 {4, 4, 4}, /* cost of storing fp registers
1612 in SFmode, DFmode and XFmode */
1613 6, /* cost of moving MMX register */
1614 {12, 12}, /* cost of loading MMX registers
1615 in SImode and DImode */
1616 {12, 12}, /* cost of storing MMX registers
1617 in SImode and DImode */
1618 6, /* cost of moving SSE register */
1619 {12, 12, 12}, /* cost of loading SSE registers
1620 in SImode, DImode and TImode */
1621 {12, 12, 12}, /* cost of storing SSE registers
1622 in SImode, DImode and TImode */
1623 8, /* MMX or SSE register to integer */
1624 8, /* size of l1 cache. */
1625 1024, /* size of l2 cache. */
1626 128, /* size of prefetch block */
1627 8, /* number of parallel prefetches */
1628 1, /* Branch cost */
1629 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1630 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1631 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1632 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1633 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1634 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1635 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1636 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1637 {100000, unrolled_loop
}, {-1, libcall
}}}},
1638 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1640 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1641 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1642 1, /* scalar_stmt_cost. */
1643 1, /* scalar load_cost. */
1644 1, /* scalar_store_cost. */
1645 1, /* vec_stmt_cost. */
1646 1, /* vec_to_scalar_cost. */
1647 1, /* scalar_to_vec_cost. */
1648 1, /* vec_align_load_cost. */
1649 2, /* vec_unalign_load_cost. */
1650 1, /* vec_store_cost. */
1651 3, /* cond_taken_branch_cost. */
1652 1, /* cond_not_taken_branch_cost. */
1656 struct processor_costs atom_cost
= {
1657 COSTS_N_INSNS (1), /* cost of an add instruction */
1658 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1659 COSTS_N_INSNS (1), /* variable shift costs */
1660 COSTS_N_INSNS (1), /* constant shift costs */
1661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1662 COSTS_N_INSNS (4), /* HI */
1663 COSTS_N_INSNS (3), /* SI */
1664 COSTS_N_INSNS (4), /* DI */
1665 COSTS_N_INSNS (2)}, /* other */
1666 0, /* cost of multiply per each bit set */
1667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1668 COSTS_N_INSNS (26), /* HI */
1669 COSTS_N_INSNS (42), /* SI */
1670 COSTS_N_INSNS (74), /* DI */
1671 COSTS_N_INSNS (74)}, /* other */
1672 COSTS_N_INSNS (1), /* cost of movsx */
1673 COSTS_N_INSNS (1), /* cost of movzx */
1674 8, /* "large" insn */
1675 17, /* MOVE_RATIO */
1676 4, /* cost for loading QImode using movzbl */
1677 {4, 4, 4}, /* cost of loading integer registers
1678 in QImode, HImode and SImode.
1679 Relative to reg-reg move (2). */
1680 {4, 4, 4}, /* cost of storing integer registers */
1681 4, /* cost of reg,reg fld/fst */
1682 {12, 12, 12}, /* cost of loading fp registers
1683 in SFmode, DFmode and XFmode */
1684 {6, 6, 8}, /* cost of storing fp registers
1685 in SFmode, DFmode and XFmode */
1686 2, /* cost of moving MMX register */
1687 {8, 8}, /* cost of loading MMX registers
1688 in SImode and DImode */
1689 {8, 8}, /* cost of storing MMX registers
1690 in SImode and DImode */
1691 2, /* cost of moving SSE register */
1692 {8, 8, 8}, /* cost of loading SSE registers
1693 in SImode, DImode and TImode */
1694 {8, 8, 8}, /* cost of storing SSE registers
1695 in SImode, DImode and TImode */
1696 5, /* MMX or SSE register to integer */
1697 32, /* size of l1 cache. */
1698 256, /* size of l2 cache. */
1699 64, /* size of prefetch block */
1700 6, /* number of parallel prefetches */
1701 3, /* Branch cost */
1702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1703 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1704 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1705 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1706 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1707 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1708 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1709 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1710 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1711 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1712 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1713 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1714 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1715 1, /* scalar_stmt_cost. */
1716 1, /* scalar load_cost. */
1717 1, /* scalar_store_cost. */
1718 1, /* vec_stmt_cost. */
1719 1, /* vec_to_scalar_cost. */
1720 1, /* scalar_to_vec_cost. */
1721 1, /* vec_align_load_cost. */
1722 2, /* vec_unalign_load_cost. */
1723 1, /* vec_store_cost. */
1724 3, /* cond_taken_branch_cost. */
1725 1, /* cond_not_taken_branch_cost. */
1728 /* Generic64 should produce code tuned for Nocona and K8. */
1730 struct processor_costs generic64_cost
= {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 /* On all chips taken into consideration lea is 2 cycles and more. With
1733 this cost however our current implementation of synth_mult results in
1734 use of unnecessary temporary registers causing regression on several
1735 SPECfp benchmarks. */
1736 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1737 COSTS_N_INSNS (1), /* variable shift costs */
1738 COSTS_N_INSNS (1), /* constant shift costs */
1739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1740 COSTS_N_INSNS (4), /* HI */
1741 COSTS_N_INSNS (3), /* SI */
1742 COSTS_N_INSNS (4), /* DI */
1743 COSTS_N_INSNS (2)}, /* other */
1744 0, /* cost of multiply per each bit set */
1745 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1746 COSTS_N_INSNS (26), /* HI */
1747 COSTS_N_INSNS (42), /* SI */
1748 COSTS_N_INSNS (74), /* DI */
1749 COSTS_N_INSNS (74)}, /* other */
1750 COSTS_N_INSNS (1), /* cost of movsx */
1751 COSTS_N_INSNS (1), /* cost of movzx */
1752 8, /* "large" insn */
1753 17, /* MOVE_RATIO */
1754 4, /* cost for loading QImode using movzbl */
1755 {4, 4, 4}, /* cost of loading integer registers
1756 in QImode, HImode and SImode.
1757 Relative to reg-reg move (2). */
1758 {4, 4, 4}, /* cost of storing integer registers */
1759 4, /* cost of reg,reg fld/fst */
1760 {12, 12, 12}, /* cost of loading fp registers
1761 in SFmode, DFmode and XFmode */
1762 {6, 6, 8}, /* cost of storing fp registers
1763 in SFmode, DFmode and XFmode */
1764 2, /* cost of moving MMX register */
1765 {8, 8}, /* cost of loading MMX registers
1766 in SImode and DImode */
1767 {8, 8}, /* cost of storing MMX registers
1768 in SImode and DImode */
1769 2, /* cost of moving SSE register */
1770 {8, 8, 8}, /* cost of loading SSE registers
1771 in SImode, DImode and TImode */
1772 {8, 8, 8}, /* cost of storing SSE registers
1773 in SImode, DImode and TImode */
1774 5, /* MMX or SSE register to integer */
1775 32, /* size of l1 cache. */
1776 512, /* size of l2 cache. */
1777 64, /* size of prefetch block */
1778 6, /* number of parallel prefetches */
1779 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1780 value is increased to perhaps more appropriate value of 5. */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {DUMMY_STRINGOP_ALGS
,
1789 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1790 {DUMMY_STRINGOP_ALGS
,
1791 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1792 1, /* scalar_stmt_cost. */
1793 1, /* scalar load_cost. */
1794 1, /* scalar_store_cost. */
1795 1, /* vec_stmt_cost. */
1796 1, /* vec_to_scalar_cost. */
1797 1, /* scalar_to_vec_cost. */
1798 1, /* vec_align_load_cost. */
1799 2, /* vec_unalign_load_cost. */
1800 1, /* vec_store_cost. */
1801 3, /* cond_taken_branch_cost. */
1802 1, /* cond_not_taken_branch_cost. */
1805 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1808 struct processor_costs generic32_cost
= {
1809 COSTS_N_INSNS (1), /* cost of an add instruction */
1810 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1811 COSTS_N_INSNS (1), /* variable shift costs */
1812 COSTS_N_INSNS (1), /* constant shift costs */
1813 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1814 COSTS_N_INSNS (4), /* HI */
1815 COSTS_N_INSNS (3), /* SI */
1816 COSTS_N_INSNS (4), /* DI */
1817 COSTS_N_INSNS (2)}, /* other */
1818 0, /* cost of multiply per each bit set */
1819 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1820 COSTS_N_INSNS (26), /* HI */
1821 COSTS_N_INSNS (42), /* SI */
1822 COSTS_N_INSNS (74), /* DI */
1823 COSTS_N_INSNS (74)}, /* other */
1824 COSTS_N_INSNS (1), /* cost of movsx */
1825 COSTS_N_INSNS (1), /* cost of movzx */
1826 8, /* "large" insn */
1827 17, /* MOVE_RATIO */
1828 4, /* cost for loading QImode using movzbl */
1829 {4, 4, 4}, /* cost of loading integer registers
1830 in QImode, HImode and SImode.
1831 Relative to reg-reg move (2). */
1832 {4, 4, 4}, /* cost of storing integer registers */
1833 4, /* cost of reg,reg fld/fst */
1834 {12, 12, 12}, /* cost of loading fp registers
1835 in SFmode, DFmode and XFmode */
1836 {6, 6, 8}, /* cost of storing fp registers
1837 in SFmode, DFmode and XFmode */
1838 2, /* cost of moving MMX register */
1839 {8, 8}, /* cost of loading MMX registers
1840 in SImode and DImode */
1841 {8, 8}, /* cost of storing MMX registers
1842 in SImode and DImode */
1843 2, /* cost of moving SSE register */
1844 {8, 8, 8}, /* cost of loading SSE registers
1845 in SImode, DImode and TImode */
1846 {8, 8, 8}, /* cost of storing SSE registers
1847 in SImode, DImode and TImode */
1848 5, /* MMX or SSE register to integer */
1849 32, /* size of l1 cache. */
1850 256, /* size of l2 cache. */
1851 64, /* size of prefetch block */
1852 6, /* number of parallel prefetches */
1853 3, /* Branch cost */
1854 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1855 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1856 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1857 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1858 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1859 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1860 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1861 DUMMY_STRINGOP_ALGS
},
1862 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1863 DUMMY_STRINGOP_ALGS
},
1864 1, /* scalar_stmt_cost. */
1865 1, /* scalar load_cost. */
1866 1, /* scalar_store_cost. */
1867 1, /* vec_stmt_cost. */
1868 1, /* vec_to_scalar_cost. */
1869 1, /* scalar_to_vec_cost. */
1870 1, /* vec_align_load_cost. */
1871 2, /* vec_unalign_load_cost. */
1872 1, /* vec_store_cost. */
1873 3, /* cond_taken_branch_cost. */
1874 1, /* cond_not_taken_branch_cost. */
1877 /* Set by -mtune. */
1878 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1880 /* Set by -mtune or -Os. */
1881 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1883 /* Processor feature/optimization bitmasks. */
1884 #define m_386 (1<<PROCESSOR_I386)
1885 #define m_486 (1<<PROCESSOR_I486)
1886 #define m_PENT (1<<PROCESSOR_PENTIUM)
1887 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1888 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1889 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1890 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1891 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1892 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1893 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1894 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1895 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1896 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1897 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1898 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1899 #define m_ATOM (1<<PROCESSOR_ATOM)
1901 #define m_GEODE (1<<PROCESSOR_GEODE)
1902 #define m_K6 (1<<PROCESSOR_K6)
1903 #define m_K6_GEODE (m_K6 | m_GEODE)
1904 #define m_K8 (1<<PROCESSOR_K8)
1905 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1906 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1907 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1908 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1909 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1910 #define m_BDVER (m_BDVER1 | m_BDVER2)
1911 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1912 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1914 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1915 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1917 /* Generic instruction choice should be common subset of supported CPUs
1918 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1919 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1921 /* Feature tests against the various tunings. */
1922 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1924 /* Feature tests against the various tunings used to create ix86_tune_features
1925 based on the processor mask. */
1926 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1927 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1928 negatively, so enabling for Generic64 seems like good code size
1929 tradeoff. We can't enable it for 32bit generic because it does not
1930 work well with PPro base chips. */
1931 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1933 /* X86_TUNE_PUSH_MEMORY */
1934 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1936 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1939 /* X86_TUNE_UNROLL_STRLEN */
1940 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1942 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1943 on simulation result. But after P4 was made, no performance benefit
1944 was observed with branch hints. It also increases the code size.
1945 As a result, icc never generates branch hints. */
1948 /* X86_TUNE_DOUBLE_WITH_ADD */
1951 /* X86_TUNE_USE_SAHF */
1952 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
1954 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1955 partial dependencies. */
1956 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1958 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1959 register stalls on Generic32 compilation setting as well. However
1960 in current implementation the partial register stalls are not eliminated
1961 very well - they can be introduced via subregs synthesized by combine
1962 and can happen in caller/callee saving sequences. Because this option
1963 pays back little on PPro based chips and is in conflict with partial reg
1964 dependencies used by Athlon/P4 based chips, it is better to leave it off
1965 for generic32 for now. */
1968 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1969 m_CORE2I7
| m_GENERIC
,
1971 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1972 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1973 m_CORE2I7
| m_GENERIC
,
1975 /* X86_TUNE_USE_HIMODE_FIOP */
1976 m_386
| m_486
| m_K6_GEODE
,
1978 /* X86_TUNE_USE_SIMODE_FIOP */
1979 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1981 /* X86_TUNE_USE_MOV0 */
1984 /* X86_TUNE_USE_CLTD */
1985 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1987 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1990 /* X86_TUNE_SPLIT_LONG_MOVES */
1993 /* X86_TUNE_READ_MODIFY_WRITE */
1996 /* X86_TUNE_READ_MODIFY */
1999 /* X86_TUNE_PROMOTE_QIMODE */
2000 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2002 /* X86_TUNE_FAST_PREFIX */
2003 ~(m_386
| m_486
| m_PENT
),
2005 /* X86_TUNE_SINGLE_STRINGOP */
2006 m_386
| m_P4_NOCONA
,
2008 /* X86_TUNE_QIMODE_MATH */
2011 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2012 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2013 might be considered for Generic32 if our scheme for avoiding partial
2014 stalls was more effective. */
2017 /* X86_TUNE_PROMOTE_QI_REGS */
2020 /* X86_TUNE_PROMOTE_HI_REGS */
2023 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2024 over esp addition. */
2025 m_386
| m_486
| m_PENT
| m_PPRO
,
2027 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2028 over esp addition. */
2031 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2032 over esp subtraction. */
2033 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2035 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2036 over esp subtraction. */
2037 m_PENT
| m_K6_GEODE
,
2039 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2040 for DFmode copies */
2041 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2043 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2044 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2046 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2047 conflict here in between PPro/Pentium4 based chips that thread 128bit
2048 SSE registers as single units versus K8 based chips that divide SSE
2049 registers to two 64bit halves. This knob promotes all store destinations
2050 to be 128bit to allow register renaming on 128bit SSE units, but usually
2051 results in one extra microop on 64bit SSE units. Experimental results
2052 shows that disabling this option on P4 brings over 20% SPECfp regression,
2053 while enabling it on K8 brings roughly 2.4% regression that can be partly
2054 masked by careful scheduling of moves. */
2055 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2057 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2058 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2060 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2063 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2066 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2067 are resolved on SSE register parts instead of whole registers, so we may
2068 maintain just lower part of scalar values in proper format leaving the
2069 upper part undefined. */
2072 /* X86_TUNE_SSE_TYPELESS_STORES */
2075 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2076 m_PPRO
| m_P4_NOCONA
,
2078 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2079 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2081 /* X86_TUNE_PROLOGUE_USING_MOVE */
2082 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2084 /* X86_TUNE_EPILOGUE_USING_MOVE */
2085 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2087 /* X86_TUNE_SHIFT1 */
2090 /* X86_TUNE_USE_FFREEP */
2093 /* X86_TUNE_INTER_UNIT_MOVES */
2094 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2096 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2097 ~(m_AMDFAM10
| m_BDVER
),
2099 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2100 than 4 branch instructions in the 16 byte window. */
2101 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2103 /* X86_TUNE_SCHEDULE */
2104 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2106 /* X86_TUNE_USE_BT */
2107 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2109 /* X86_TUNE_USE_INCDEC */
2110 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2112 /* X86_TUNE_PAD_RETURNS */
2113 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2115 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2118 /* X86_TUNE_EXT_80387_CONSTANTS */
2119 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2121 /* X86_TUNE_SHORTEN_X87_SSE */
2124 /* X86_TUNE_AVOID_VECTOR_DECODE */
2125 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2127 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2128 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2131 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2132 vector path on AMD machines. */
2133 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2135 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2137 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2139 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2143 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2144 but one byte longer. */
2147 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2148 operand that cannot be represented using a modRM byte. The XOR
2149 replacement is long decoded, so this split helps here as well. */
2152 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2154 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2156 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2157 from integer to FP. */
2160 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2161 with a subsequent conditional jump instruction into a single
2162 compare-and-branch uop. */
2165 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2166 will impact LEA instruction selection. */
2169 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2173 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2174 at -O3. For the moment, the prefetching seems badly tuned for Intel
2176 m_K6_GEODE
| m_AMD_MULTIPLE
,
2178 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2179 the auto-vectorizer. */
2182 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2183 during reassociation of integer computation. */
2186 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2187 during reassociation of fp computation. */
2191 /* Feature tests against the various architecture variations. */
2192 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2194 /* Feature tests against the various architecture variations, used to create
2195 ix86_arch_features based on the processor mask. */
2196 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2197 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2198 ~(m_386
| m_486
| m_PENT
| m_K6
),
2200 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2203 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2206 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2209 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2213 static const unsigned int x86_accumulate_outgoing_args
2214 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2216 static const unsigned int x86_arch_always_fancy_math_387
2217 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2219 static const unsigned int x86_avx256_split_unaligned_load
2220 = m_COREI7
| m_GENERIC
;
2222 static const unsigned int x86_avx256_split_unaligned_store
2223 = m_COREI7
| m_BDVER
| m_GENERIC
;
2225 /* In case the average insn count for single function invocation is
2226 lower than this constant, emit fast (but longer) prologue and
2228 #define FAST_PROLOGUE_INSN_COUNT 20
2230 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2231 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2232 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2233 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2235 /* Array of the smallest class containing reg number REGNO, indexed by
2236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2238 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2240 /* ax, dx, cx, bx */
2241 AREG
, DREG
, CREG
, BREG
,
2242 /* si, di, bp, sp */
2243 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2245 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2246 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2249 /* flags, fpsr, fpcr, frame */
2250 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2252 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2255 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2258 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2259 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2260 /* SSE REX registers */
2261 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2265 /* The "default" register map used in 32bit mode. */
2267 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2269 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2270 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2271 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2272 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2273 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2274 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2275 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2278 /* The "default" register map used in 64bit mode. */
2280 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2282 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2283 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2284 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2285 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2286 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2287 8,9,10,11,12,13,14,15, /* extended integer registers */
2288 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2291 /* Define the register numbers to be used in Dwarf debugging information.
2292 The SVR4 reference port C compiler uses the following register numbers
2293 in its Dwarf output code:
2294 0 for %eax (gcc regno = 0)
2295 1 for %ecx (gcc regno = 2)
2296 2 for %edx (gcc regno = 1)
2297 3 for %ebx (gcc regno = 3)
2298 4 for %esp (gcc regno = 7)
2299 5 for %ebp (gcc regno = 6)
2300 6 for %esi (gcc regno = 4)
2301 7 for %edi (gcc regno = 5)
2302 The following three DWARF register numbers are never generated by
2303 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2304 believes these numbers have these meanings.
2305 8 for %eip (no gcc equivalent)
2306 9 for %eflags (gcc regno = 17)
2307 10 for %trapno (no gcc equivalent)
2308 It is not at all clear how we should number the FP stack registers
2309 for the x86 architecture. If the version of SDB on x86/svr4 were
2310 a bit less brain dead with respect to floating-point then we would
2311 have a precedent to follow with respect to DWARF register numbers
2312 for x86 FP registers, but the SDB on x86/svr4 is so completely
2313 broken with respect to FP registers that it is hardly worth thinking
2314 of it as something to strive for compatibility with.
2315 The version of x86/svr4 SDB I have at the moment does (partially)
2316 seem to believe that DWARF register number 11 is associated with
2317 the x86 register %st(0), but that's about all. Higher DWARF
2318 register numbers don't seem to be associated with anything in
2319 particular, and even for DWARF regno 11, SDB only seems to under-
2320 stand that it should say that a variable lives in %st(0) (when
2321 asked via an `=' command) if we said it was in DWARF regno 11,
2322 but SDB still prints garbage when asked for the value of the
2323 variable in question (via a `/' command).
2324 (Also note that the labels SDB prints for various FP stack regs
2325 when doing an `x' command are all wrong.)
2326 Note that these problems generally don't affect the native SVR4
2327 C compiler because it doesn't allow the use of -O with -g and
2328 because when it is *not* optimizing, it allocates a memory
2329 location for each floating-point variable, and the memory
2330 location is what gets described in the DWARF AT_location
2331 attribute for the variable in question.
2332 Regardless of the severe mental illness of the x86/svr4 SDB, we
2333 do something sensible here and we use the following DWARF
2334 register numbers. Note that these are all stack-top-relative
2336 11 for %st(0) (gcc regno = 8)
2337 12 for %st(1) (gcc regno = 9)
2338 13 for %st(2) (gcc regno = 10)
2339 14 for %st(3) (gcc regno = 11)
2340 15 for %st(4) (gcc regno = 12)
2341 16 for %st(5) (gcc regno = 13)
2342 17 for %st(6) (gcc regno = 14)
2343 18 for %st(7) (gcc regno = 15)
2345 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2347 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2348 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2349 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2350 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2351 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2352 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2353 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2356 /* Define parameter passing and return registers. */
2358 static int const x86_64_int_parameter_registers
[6] =
2360 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2363 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2365 CX_REG
, DX_REG
, R8_REG
, R9_REG
2368 static int const x86_64_int_return_registers
[4] =
2370 AX_REG
, DX_REG
, DI_REG
, SI_REG
2373 /* Define the structure for the machine field in struct function. */
2375 struct GTY(()) stack_local_entry
{
2376 unsigned short mode
;
2379 struct stack_local_entry
*next
;
2382 /* Structure describing stack frame layout.
2383 Stack grows downward:
2389 saved static chain if ix86_static_chain_on_stack
2391 saved frame pointer if frame_pointer_needed
2392 <- HARD_FRAME_POINTER
2398 <- sse_regs_save_offset
2401 [va_arg registers] |
2405 [padding2] | = to_allocate
2414 int outgoing_arguments_size
;
2416 /* The offsets relative to ARG_POINTER. */
2417 HOST_WIDE_INT frame_pointer_offset
;
2418 HOST_WIDE_INT hard_frame_pointer_offset
;
2419 HOST_WIDE_INT stack_pointer_offset
;
2420 HOST_WIDE_INT hfp_save_offset
;
2421 HOST_WIDE_INT reg_save_offset
;
2422 HOST_WIDE_INT sse_reg_save_offset
;
2424 /* When save_regs_using_mov is set, emit prologue using
2425 move instead of push instructions. */
2426 bool save_regs_using_mov
;
2429 /* Which cpu are we scheduling for. */
2430 enum attr_cpu ix86_schedule
;
2432 /* Which cpu are we optimizing for. */
2433 enum processor_type ix86_tune
;
2435 /* Which instruction set architecture to use. */
2436 enum processor_type ix86_arch
;
2438 /* true if sse prefetch instruction is not NOOP. */
2439 int x86_prefetch_sse
;
2441 /* -mstackrealign option */
2442 static const char ix86_force_align_arg_pointer_string
[]
2443 = "force_align_arg_pointer";
2445 static rtx (*ix86_gen_leave
) (void);
2446 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2447 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2448 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2449 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2450 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2451 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2452 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2453 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2454 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2455 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2456 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2458 /* Preferred alignment for stack boundary in bits. */
2459 unsigned int ix86_preferred_stack_boundary
;
2461 /* Alignment for incoming stack boundary in bits specified at
2463 static unsigned int ix86_user_incoming_stack_boundary
;
2465 /* Default alignment for incoming stack boundary in bits. */
2466 static unsigned int ix86_default_incoming_stack_boundary
;
2468 /* Alignment for incoming stack boundary in bits. */
2469 unsigned int ix86_incoming_stack_boundary
;
2471 /* Calling abi specific va_list type nodes. */
2472 static GTY(()) tree sysv_va_list_type_node
;
2473 static GTY(()) tree ms_va_list_type_node
;
2475 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2476 char internal_label_prefix
[16];
2477 int internal_label_prefix_len
;
2479 /* Fence to use after loop using movnt. */
2482 /* Register class used for passing given 64bit part of the argument.
2483 These represent classes as documented by the PS ABI, with the exception
2484 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2485 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2487 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2488 whenever possible (upper half does contain padding). */
2489 enum x86_64_reg_class
2492 X86_64_INTEGER_CLASS
,
2493 X86_64_INTEGERSI_CLASS
,
2500 X86_64_COMPLEX_X87_CLASS
,
2504 #define MAX_CLASSES 4
2506 /* Table of constants used by fldpi, fldln2, etc.... */
2507 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2508 static bool ext_80387_constants_init
= 0;
2511 static struct machine_function
* ix86_init_machine_status (void);
2512 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2513 static bool ix86_function_value_regno_p (const unsigned int);
2514 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2516 static rtx
ix86_static_chain (const_tree
, bool);
2517 static int ix86_function_regparm (const_tree
, const_tree
);
2518 static void ix86_compute_frame_layout (struct ix86_frame
*);
2519 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2521 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2522 static tree
ix86_canonical_va_list_type (tree
);
2523 static void predict_jump (int);
2524 static unsigned int split_stack_prologue_scratch_regno (void);
2525 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2527 enum ix86_function_specific_strings
2529 IX86_FUNCTION_SPECIFIC_ARCH
,
2530 IX86_FUNCTION_SPECIFIC_TUNE
,
2531 IX86_FUNCTION_SPECIFIC_MAX
2534 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2535 const char *, enum fpmath_unit
, bool);
2536 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2537 static void ix86_function_specific_save (struct cl_target_option
*);
2538 static void ix86_function_specific_restore (struct cl_target_option
*);
2539 static void ix86_function_specific_print (FILE *, int,
2540 struct cl_target_option
*);
2541 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2542 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2543 struct gcc_options
*);
2544 static bool ix86_can_inline_p (tree
, tree
);
2545 static void ix86_set_current_function (tree
);
2546 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2548 static enum calling_abi
ix86_function_abi (const_tree
);
2551 #ifndef SUBTARGET32_DEFAULT_CPU
2552 #define SUBTARGET32_DEFAULT_CPU "i386"
2555 /* The svr4 ABI for the i386 says that records and unions are returned
2557 #ifndef DEFAULT_PCC_STRUCT_RETURN
2558 #define DEFAULT_PCC_STRUCT_RETURN 1
2561 /* Whether -mtune= or -march= were specified */
2562 static int ix86_tune_defaulted
;
2563 static int ix86_arch_specified
;
2565 /* Vectorization library interface and handlers. */
2566 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2568 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2569 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2571 /* Processor target table, indexed by processor number */
2574 const struct processor_costs
*cost
; /* Processor costs */
2575 const int align_loop
; /* Default alignments. */
2576 const int align_loop_max_skip
;
2577 const int align_jump
;
2578 const int align_jump_max_skip
;
2579 const int align_func
;
2582 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2584 {&i386_cost
, 4, 3, 4, 3, 4},
2585 {&i486_cost
, 16, 15, 16, 15, 16},
2586 {&pentium_cost
, 16, 7, 16, 7, 16},
2587 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2588 {&geode_cost
, 0, 0, 0, 0, 0},
2589 {&k6_cost
, 32, 7, 32, 7, 32},
2590 {&athlon_cost
, 16, 7, 16, 7, 16},
2591 {&pentium4_cost
, 0, 0, 0, 0, 0},
2592 {&k8_cost
, 16, 7, 16, 7, 16},
2593 {&nocona_cost
, 0, 0, 0, 0, 0},
2594 /* Core 2 32-bit. */
2595 {&generic32_cost
, 16, 10, 16, 10, 16},
2596 /* Core 2 64-bit. */
2597 {&generic64_cost
, 16, 10, 16, 10, 16},
2598 /* Core i7 32-bit. */
2599 {&generic32_cost
, 16, 10, 16, 10, 16},
2600 /* Core i7 64-bit. */
2601 {&generic64_cost
, 16, 10, 16, 10, 16},
2602 {&generic32_cost
, 16, 7, 16, 7, 16},
2603 {&generic64_cost
, 16, 10, 16, 10, 16},
2604 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2605 {&bdver1_cost
, 32, 24, 32, 7, 32},
2606 {&bdver2_cost
, 32, 24, 32, 7, 32},
2607 {&btver1_cost
, 32, 24, 32, 7, 32},
2608 {&atom_cost
, 16, 15, 16, 7, 16}
2611 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2641 /* Return true if a red-zone is in use. */
2644 ix86_using_red_zone (void)
2646 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2649 /* Return a string that documents the current -m options. The caller is
2650 responsible for freeing the string. */
2653 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2654 const char *tune
, enum fpmath_unit fpmath
,
2657 struct ix86_target_opts
2659 const char *option
; /* option string */
2660 HOST_WIDE_INT mask
; /* isa mask options */
2663 /* This table is ordered so that options like -msse4.2 that imply
2664 preceding options while match those first. */
2665 static struct ix86_target_opts isa_opts
[] =
2667 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2668 { "-mfma", OPTION_MASK_ISA_FMA
},
2669 { "-mxop", OPTION_MASK_ISA_XOP
},
2670 { "-mlwp", OPTION_MASK_ISA_LWP
},
2671 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2672 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2673 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2674 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2675 { "-msse3", OPTION_MASK_ISA_SSE3
},
2676 { "-msse2", OPTION_MASK_ISA_SSE2
},
2677 { "-msse", OPTION_MASK_ISA_SSE
},
2678 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2679 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2680 { "-mmmx", OPTION_MASK_ISA_MMX
},
2681 { "-mabm", OPTION_MASK_ISA_ABM
},
2682 { "-mbmi", OPTION_MASK_ISA_BMI
},
2683 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2684 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2685 { "-mhle", OPTION_MASK_ISA_HLE
},
2686 { "-mtbm", OPTION_MASK_ISA_TBM
},
2687 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2688 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2689 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2690 { "-maes", OPTION_MASK_ISA_AES
},
2691 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2692 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2693 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2694 { "-mf16c", OPTION_MASK_ISA_F16C
},
2695 { "-mrtm", OPTION_MASK_ISA_RTM
},
2699 static struct ix86_target_opts flag_opts
[] =
2701 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2702 { "-m80387", MASK_80387
},
2703 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2704 { "-malign-double", MASK_ALIGN_DOUBLE
},
2705 { "-mcld", MASK_CLD
},
2706 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2707 { "-mieee-fp", MASK_IEEE_FP
},
2708 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2709 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2710 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2711 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2712 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2713 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2714 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2715 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2716 { "-mrecip", MASK_RECIP
},
2717 { "-mrtd", MASK_RTD
},
2718 { "-msseregparm", MASK_SSEREGPARM
},
2719 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2720 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2721 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2722 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2723 { "-mvzeroupper", MASK_VZEROUPPER
},
2724 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2725 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2726 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2729 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2732 char target_other
[40];
2742 memset (opts
, '\0', sizeof (opts
));
2744 /* Add -march= option. */
2747 opts
[num
][0] = "-march=";
2748 opts
[num
++][1] = arch
;
2751 /* Add -mtune= option. */
2754 opts
[num
][0] = "-mtune=";
2755 opts
[num
++][1] = tune
;
2758 /* Add -m32/-m64/-mx32. */
2759 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2761 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2765 isa
&= ~ (OPTION_MASK_ISA_64BIT
2766 | OPTION_MASK_ABI_64
2767 | OPTION_MASK_ABI_X32
);
2771 opts
[num
++][0] = abi
;
2773 /* Pick out the options in isa options. */
2774 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2776 if ((isa
& isa_opts
[i
].mask
) != 0)
2778 opts
[num
++][0] = isa_opts
[i
].option
;
2779 isa
&= ~ isa_opts
[i
].mask
;
2783 if (isa
&& add_nl_p
)
2785 opts
[num
++][0] = isa_other
;
2786 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2790 /* Add flag options. */
2791 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2793 if ((flags
& flag_opts
[i
].mask
) != 0)
2795 opts
[num
++][0] = flag_opts
[i
].option
;
2796 flags
&= ~ flag_opts
[i
].mask
;
2800 if (flags
&& add_nl_p
)
2802 opts
[num
++][0] = target_other
;
2803 sprintf (target_other
, "(other flags: %#x)", flags
);
2806 /* Add -fpmath= option. */
2809 opts
[num
][0] = "-mfpmath=";
2810 switch ((int) fpmath
)
2813 opts
[num
++][1] = "387";
2817 opts
[num
++][1] = "sse";
2820 case FPMATH_387
| FPMATH_SSE
:
2821 opts
[num
++][1] = "sse+387";
2833 gcc_assert (num
< ARRAY_SIZE (opts
));
2835 /* Size the string. */
2837 sep_len
= (add_nl_p
) ? 3 : 1;
2838 for (i
= 0; i
< num
; i
++)
2841 for (j
= 0; j
< 2; j
++)
2843 len
+= strlen (opts
[i
][j
]);
2846 /* Build the string. */
2847 ret
= ptr
= (char *) xmalloc (len
);
2850 for (i
= 0; i
< num
; i
++)
2854 for (j
= 0; j
< 2; j
++)
2855 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2862 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2870 for (j
= 0; j
< 2; j
++)
2873 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2875 line_len
+= len2
[j
];
2880 gcc_assert (ret
+ len
>= ptr
);
2885 /* Return true, if profiling code should be emitted before
2886 prologue. Otherwise it returns false.
2887 Note: For x86 with "hotfix" it is sorried. */
2889 ix86_profile_before_prologue (void)
2891 return flag_fentry
!= 0;
2894 /* Function that is callable from the debugger to print the current
2897 ix86_debug_options (void)
2899 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2900 ix86_arch_string
, ix86_tune_string
,
2905 fprintf (stderr
, "%s\n\n", opts
);
2909 fputs ("<no options>\n\n", stderr
);
2914 /* Override various settings based on options. If MAIN_ARGS_P, the
2915 options are from the command line, otherwise they are from
2919 ix86_option_override_internal (bool main_args_p
)
2922 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2923 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2928 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2929 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2930 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2931 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2932 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2933 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2934 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2935 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2936 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2937 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2938 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2939 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2940 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2941 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2942 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2943 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2944 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2945 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2946 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2947 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2948 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2949 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2950 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2951 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2952 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2953 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2954 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2955 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2956 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2957 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2958 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2959 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2960 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2961 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2962 /* if this reaches 64, need to widen struct pta flags below */
2966 const char *const name
; /* processor name or nickname. */
2967 const enum processor_type processor
;
2968 const enum attr_cpu schedule
;
2969 const unsigned HOST_WIDE_INT flags
;
2971 const processor_alias_table
[] =
2973 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2974 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2975 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2976 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2977 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2978 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2979 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2980 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2981 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2982 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2983 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2984 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2985 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2987 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2989 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2990 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2991 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2992 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2993 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2994 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2995 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2996 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2997 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2998 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2999 | PTA_CX16
| PTA_NO_SAHF
},
3000 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
3001 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3002 | PTA_SSSE3
| PTA_CX16
},
3003 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
3004 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3005 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
3006 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
3007 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3008 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3009 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
3010 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
3011 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3012 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3013 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3014 | PTA_RDRND
| PTA_F16C
},
3015 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
3016 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3017 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3018 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3019 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3020 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
},
3021 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3022 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3023 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
3024 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3025 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3026 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3027 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3028 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3029 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3030 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3031 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3032 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3033 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3034 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3035 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3036 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3037 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3038 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3039 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3040 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3041 {"k8", PROCESSOR_K8
, CPU_K8
,
3042 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3043 | PTA_SSE2
| PTA_NO_SAHF
},
3044 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3045 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3046 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3047 {"opteron", PROCESSOR_K8
, CPU_K8
,
3048 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3049 | PTA_SSE2
| PTA_NO_SAHF
},
3050 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3051 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3052 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3053 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3054 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3055 | PTA_SSE2
| PTA_NO_SAHF
},
3056 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3057 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3058 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3059 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3060 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3061 | PTA_SSE2
| PTA_NO_SAHF
},
3062 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3063 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3064 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3065 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3066 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3067 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3068 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3069 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3070 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3071 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3072 | PTA_XOP
| PTA_LWP
},
3073 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3074 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3075 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3076 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3077 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3079 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3080 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3081 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3082 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3083 PTA_HLE
/* flags are only used for -march switch. */ },
3084 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3086 | PTA_HLE
/* flags are only used for -march switch. */ },
3089 /* -mrecip options. */
3092 const char *string
; /* option name */
3093 unsigned int mask
; /* mask bits to set */
3095 const recip_options
[] =
3097 { "all", RECIP_MASK_ALL
},
3098 { "none", RECIP_MASK_NONE
},
3099 { "div", RECIP_MASK_DIV
},
3100 { "sqrt", RECIP_MASK_SQRT
},
3101 { "vec-div", RECIP_MASK_VEC_DIV
},
3102 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3105 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3107 /* Set up prefix/suffix so the error messages refer to either the command
3108 line argument, or the attribute(target). */
3117 prefix
= "option(\"";
3122 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3123 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3124 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3125 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3126 #ifdef TARGET_BI_ARCH
3129 #if TARGET_BI_ARCH == 1
3130 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3131 is on and OPTION_MASK_ABI_X32 is off. We turn off
3132 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3135 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3137 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3138 on and OPTION_MASK_ABI_64 is off. We turn off
3139 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3142 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3149 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3150 OPTION_MASK_ABI_64 for TARGET_X32. */
3151 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3152 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3154 else if (TARGET_LP64
)
3156 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3157 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3158 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3159 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3162 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3163 SUBTARGET_OVERRIDE_OPTIONS
;
3166 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3167 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3170 /* -fPIC is the default for x86_64. */
3171 if (TARGET_MACHO
&& TARGET_64BIT
)
3174 /* Need to check -mtune=generic first. */
3175 if (ix86_tune_string
)
3177 if (!strcmp (ix86_tune_string
, "generic")
3178 || !strcmp (ix86_tune_string
, "i686")
3179 /* As special support for cross compilers we read -mtune=native
3180 as -mtune=generic. With native compilers we won't see the
3181 -mtune=native, as it was changed by the driver. */
3182 || !strcmp (ix86_tune_string
, "native"))
3185 ix86_tune_string
= "generic64";
3187 ix86_tune_string
= "generic32";
3189 /* If this call is for setting the option attribute, allow the
3190 generic32/generic64 that was previously set. */
3191 else if (!main_args_p
3192 && (!strcmp (ix86_tune_string
, "generic32")
3193 || !strcmp (ix86_tune_string
, "generic64")))
3195 else if (!strncmp (ix86_tune_string
, "generic", 7))
3196 error ("bad value (%s) for %stune=%s %s",
3197 ix86_tune_string
, prefix
, suffix
, sw
);
3198 else if (!strcmp (ix86_tune_string
, "x86-64"))
3199 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3200 "%stune=k8%s or %stune=generic%s instead as appropriate",
3201 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3205 if (ix86_arch_string
)
3206 ix86_tune_string
= ix86_arch_string
;
3207 if (!ix86_tune_string
)
3209 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3210 ix86_tune_defaulted
= 1;
3213 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3214 need to use a sensible tune option. */
3215 if (!strcmp (ix86_tune_string
, "generic")
3216 || !strcmp (ix86_tune_string
, "x86-64")
3217 || !strcmp (ix86_tune_string
, "i686"))
3220 ix86_tune_string
= "generic64";
3222 ix86_tune_string
= "generic32";
3226 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3228 /* rep; movq isn't available in 32-bit code. */
3229 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3230 ix86_stringop_alg
= no_stringop
;
3233 if (!ix86_arch_string
)
3234 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3236 ix86_arch_specified
= 1;
3238 if (global_options_set
.x_ix86_pmode
)
3240 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3241 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3242 error ("address mode %qs not supported in the %s bit mode",
3243 TARGET_64BIT
? "short" : "long",
3244 TARGET_64BIT
? "64" : "32");
3247 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3249 if (!global_options_set
.x_ix86_abi
)
3250 ix86_abi
= DEFAULT_ABI
;
3252 if (global_options_set
.x_ix86_cmodel
)
3254 switch (ix86_cmodel
)
3259 ix86_cmodel
= CM_SMALL_PIC
;
3261 error ("code model %qs not supported in the %s bit mode",
3268 ix86_cmodel
= CM_MEDIUM_PIC
;
3270 error ("code model %qs not supported in the %s bit mode",
3272 else if (TARGET_X32
)
3273 error ("code model %qs not supported in x32 mode",
3280 ix86_cmodel
= CM_LARGE_PIC
;
3282 error ("code model %qs not supported in the %s bit mode",
3284 else if (TARGET_X32
)
3285 error ("code model %qs not supported in x32 mode",
3291 error ("code model %s does not support PIC mode", "32");
3293 error ("code model %qs not supported in the %s bit mode",
3300 error ("code model %s does not support PIC mode", "kernel");
3301 ix86_cmodel
= CM_32
;
3304 error ("code model %qs not supported in the %s bit mode",
3314 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3315 use of rip-relative addressing. This eliminates fixups that
3316 would otherwise be needed if this object is to be placed in a
3317 DLL, and is essentially just as efficient as direct addressing. */
3318 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3319 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3320 else if (TARGET_64BIT
)
3321 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3323 ix86_cmodel
= CM_32
;
3325 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3327 error ("-masm=intel not supported in this configuration");
3328 ix86_asm_dialect
= ASM_ATT
;
3330 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3331 sorry ("%i-bit mode not compiled in",
3332 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3334 for (i
= 0; i
< pta_size
; i
++)
3335 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3337 ix86_schedule
= processor_alias_table
[i
].schedule
;
3338 ix86_arch
= processor_alias_table
[i
].processor
;
3339 /* Default cpu tuning to the architecture. */
3340 ix86_tune
= ix86_arch
;
3342 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3343 error ("CPU you selected does not support x86-64 "
3346 if (processor_alias_table
[i
].flags
& PTA_MMX
3347 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3348 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3349 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3350 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3351 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3352 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3353 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3354 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3355 if (processor_alias_table
[i
].flags
& PTA_SSE
3356 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3357 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3358 if (processor_alias_table
[i
].flags
& PTA_SSE2
3359 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3360 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3361 if (processor_alias_table
[i
].flags
& PTA_SSE3
3362 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3363 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3364 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3365 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3366 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3367 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3368 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3369 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3370 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3371 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3372 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3373 if (processor_alias_table
[i
].flags
& PTA_AVX
3374 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3375 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3376 if (processor_alias_table
[i
].flags
& PTA_AVX2
3377 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3378 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3379 if (processor_alias_table
[i
].flags
& PTA_FMA
3380 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3381 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3382 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3383 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3384 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3385 if (processor_alias_table
[i
].flags
& PTA_FMA4
3386 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3387 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3388 if (processor_alias_table
[i
].flags
& PTA_XOP
3389 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3390 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3391 if (processor_alias_table
[i
].flags
& PTA_LWP
3392 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3393 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3394 if (processor_alias_table
[i
].flags
& PTA_ABM
3395 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3396 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3397 if (processor_alias_table
[i
].flags
& PTA_BMI
3398 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3399 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3400 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3401 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3402 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3403 if (processor_alias_table
[i
].flags
& PTA_TBM
3404 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3405 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3406 if (processor_alias_table
[i
].flags
& PTA_BMI2
3407 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3408 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3409 if (processor_alias_table
[i
].flags
& PTA_CX16
3410 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3411 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3412 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3413 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3414 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3415 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3416 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3417 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3418 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3419 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3420 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3421 if (processor_alias_table
[i
].flags
& PTA_AES
3422 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3423 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3424 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3425 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3426 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3427 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3428 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3429 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3430 if (processor_alias_table
[i
].flags
& PTA_RDRND
3431 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3432 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3433 if (processor_alias_table
[i
].flags
& PTA_F16C
3434 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3435 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3436 if (processor_alias_table
[i
].flags
& PTA_RTM
3437 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3438 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3439 if (processor_alias_table
[i
].flags
& PTA_HLE
3440 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3441 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3442 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3443 x86_prefetch_sse
= true;
3448 if (!strcmp (ix86_arch_string
, "generic"))
3449 error ("generic CPU can be used only for %stune=%s %s",
3450 prefix
, suffix
, sw
);
3451 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3452 error ("bad value (%s) for %sarch=%s %s",
3453 ix86_arch_string
, prefix
, suffix
, sw
);
3455 ix86_arch_mask
= 1u << ix86_arch
;
3456 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3457 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3459 for (i
= 0; i
< pta_size
; i
++)
3460 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3462 ix86_schedule
= processor_alias_table
[i
].schedule
;
3463 ix86_tune
= processor_alias_table
[i
].processor
;
3466 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3468 if (ix86_tune_defaulted
)
3470 ix86_tune_string
= "x86-64";
3471 for (i
= 0; i
< pta_size
; i
++)
3472 if (! strcmp (ix86_tune_string
,
3473 processor_alias_table
[i
].name
))
3475 ix86_schedule
= processor_alias_table
[i
].schedule
;
3476 ix86_tune
= processor_alias_table
[i
].processor
;
3479 error ("CPU you selected does not support x86-64 "
3485 /* Adjust tuning when compiling for 32-bit ABI. */
3488 case PROCESSOR_GENERIC64
:
3489 ix86_tune
= PROCESSOR_GENERIC32
;
3490 ix86_schedule
= CPU_PENTIUMPRO
;
3493 case PROCESSOR_CORE2_64
:
3494 ix86_tune
= PROCESSOR_CORE2_32
;
3497 case PROCESSOR_COREI7_64
:
3498 ix86_tune
= PROCESSOR_COREI7_32
;
3505 /* Intel CPUs have always interpreted SSE prefetch instructions as
3506 NOPs; so, we can enable SSE prefetch instructions even when
3507 -mtune (rather than -march) points us to a processor that has them.
3508 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3509 higher processors. */
3511 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3512 x86_prefetch_sse
= true;
3516 if (ix86_tune_specified
&& i
== pta_size
)
3517 error ("bad value (%s) for %stune=%s %s",
3518 ix86_tune_string
, prefix
, suffix
, sw
);
3520 ix86_tune_mask
= 1u << ix86_tune
;
3521 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3522 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3524 #ifndef USE_IX86_FRAME_POINTER
3525 #define USE_IX86_FRAME_POINTER 0
3528 #ifndef USE_X86_64_FRAME_POINTER
3529 #define USE_X86_64_FRAME_POINTER 0
3532 /* Set the default values for switches whose default depends on TARGET_64BIT
3533 in case they weren't overwritten by command line options. */
3536 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3537 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3538 if (flag_asynchronous_unwind_tables
== 2)
3539 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3540 if (flag_pcc_struct_return
== 2)
3541 flag_pcc_struct_return
= 0;
3545 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3546 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3547 if (flag_asynchronous_unwind_tables
== 2)
3548 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3549 if (flag_pcc_struct_return
== 2)
3550 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3553 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3555 ix86_cost
= &ix86_size_cost
;
3557 ix86_cost
= ix86_tune_cost
;
3559 /* Arrange to set up i386_stack_locals for all functions. */
3560 init_machine_status
= ix86_init_machine_status
;
3562 /* Validate -mregparm= value. */
3563 if (global_options_set
.x_ix86_regparm
)
3566 warning (0, "-mregparm is ignored in 64-bit mode");
3567 if (ix86_regparm
> REGPARM_MAX
)
3569 error ("-mregparm=%d is not between 0 and %d",
3570 ix86_regparm
, REGPARM_MAX
);
3575 ix86_regparm
= REGPARM_MAX
;
3577 /* Default align_* from the processor table. */
3578 if (align_loops
== 0)
3580 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3581 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3583 if (align_jumps
== 0)
3585 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3586 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3588 if (align_functions
== 0)
3590 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3593 /* Provide default for -mbranch-cost= value. */
3594 if (!global_options_set
.x_ix86_branch_cost
)
3595 ix86_branch_cost
= ix86_cost
->branch_cost
;
3599 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3601 /* Enable by default the SSE and MMX builtins. Do allow the user to
3602 explicitly disable any of these. In particular, disabling SSE and
3603 MMX for kernel code is extremely useful. */
3604 if (!ix86_arch_specified
)
3606 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3607 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3610 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3614 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3616 if (!ix86_arch_specified
)
3618 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3620 /* i386 ABI does not specify red zone. It still makes sense to use it
3621 when programmer takes care to stack from being destroyed. */
3622 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3623 target_flags
|= MASK_NO_RED_ZONE
;
3626 /* Keep nonleaf frame pointers. */
3627 if (flag_omit_frame_pointer
)
3628 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3629 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3630 flag_omit_frame_pointer
= 1;
3632 /* If we're doing fast math, we don't care about comparison order
3633 wrt NaNs. This lets us use a shorter comparison sequence. */
3634 if (flag_finite_math_only
)
3635 target_flags
&= ~MASK_IEEE_FP
;
3637 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3638 since the insns won't need emulation. */
3639 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3640 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3642 /* Likewise, if the target doesn't have a 387, or we've specified
3643 software floating point, don't use 387 inline intrinsics. */
3645 target_flags
|= MASK_NO_FANCY_MATH_387
;
3647 /* Turn on MMX builtins for -msse. */
3650 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3651 x86_prefetch_sse
= true;
3654 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3655 if (TARGET_SSE4_2
|| TARGET_ABM
)
3656 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3658 /* Turn on lzcnt instruction for -mabm. */
3660 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3662 /* Validate -mpreferred-stack-boundary= value or default it to
3663 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3664 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3665 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3667 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3668 int max
= (TARGET_SEH
? 4 : 12);
3670 if (ix86_preferred_stack_boundary_arg
< min
3671 || ix86_preferred_stack_boundary_arg
> max
)
3674 error ("-mpreferred-stack-boundary is not supported "
3677 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3678 ix86_preferred_stack_boundary_arg
, min
, max
);
3681 ix86_preferred_stack_boundary
3682 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3685 /* Set the default value for -mstackrealign. */
3686 if (ix86_force_align_arg_pointer
== -1)
3687 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3689 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3691 /* Validate -mincoming-stack-boundary= value or default it to
3692 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3693 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3694 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3696 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3697 || ix86_incoming_stack_boundary_arg
> 12)
3698 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3699 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3702 ix86_user_incoming_stack_boundary
3703 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3704 ix86_incoming_stack_boundary
3705 = ix86_user_incoming_stack_boundary
;
3709 /* Accept -msseregparm only if at least SSE support is enabled. */
3710 if (TARGET_SSEREGPARM
3712 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3714 if (global_options_set
.x_ix86_fpmath
)
3716 if (ix86_fpmath
& FPMATH_SSE
)
3720 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3721 ix86_fpmath
= FPMATH_387
;
3723 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3725 warning (0, "387 instruction set disabled, using SSE arithmetics");
3726 ix86_fpmath
= FPMATH_SSE
;
3731 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3733 /* If the i387 is disabled, then do not return values in it. */
3735 target_flags
&= ~MASK_FLOAT_RETURNS
;
3737 /* Use external vectorized library in vectorizing intrinsics. */
3738 if (global_options_set
.x_ix86_veclibabi_type
)
3739 switch (ix86_veclibabi_type
)
3741 case ix86_veclibabi_type_svml
:
3742 ix86_veclib_handler
= ix86_veclibabi_svml
;
3745 case ix86_veclibabi_type_acml
:
3746 ix86_veclib_handler
= ix86_veclibabi_acml
;
3753 if ((!USE_IX86_FRAME_POINTER
3754 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3755 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3757 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3759 /* ??? Unwind info is not correct around the CFG unless either a frame
3760 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3761 unwind info generation to be aware of the CFG and propagating states
3763 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3764 || flag_exceptions
|| flag_non_call_exceptions
)
3765 && flag_omit_frame_pointer
3766 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3768 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3769 warning (0, "unwind tables currently require either a frame pointer "
3770 "or %saccumulate-outgoing-args%s for correctness",
3772 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3775 /* If stack probes are required, the space used for large function
3776 arguments on the stack must also be probed, so enable
3777 -maccumulate-outgoing-args so this happens in the prologue. */
3778 if (TARGET_STACK_PROBE
3779 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3781 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3782 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3783 "for correctness", prefix
, suffix
);
3784 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3787 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3790 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3791 p
= strchr (internal_label_prefix
, 'X');
3792 internal_label_prefix_len
= p
- internal_label_prefix
;
3796 /* When scheduling description is not available, disable scheduler pass
3797 so it won't slow down the compilation and make x87 code slower. */
3798 if (!TARGET_SCHEDULE
)
3799 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3801 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3802 ix86_tune_cost
->simultaneous_prefetches
,
3803 global_options
.x_param_values
,
3804 global_options_set
.x_param_values
);
3805 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3806 ix86_tune_cost
->prefetch_block
,
3807 global_options
.x_param_values
,
3808 global_options_set
.x_param_values
);
3809 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3810 ix86_tune_cost
->l1_cache_size
,
3811 global_options
.x_param_values
,
3812 global_options_set
.x_param_values
);
3813 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3814 ix86_tune_cost
->l2_cache_size
,
3815 global_options
.x_param_values
,
3816 global_options_set
.x_param_values
);
3818 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3819 if (flag_prefetch_loop_arrays
< 0
3822 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3823 flag_prefetch_loop_arrays
= 1;
3825 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3826 can be optimized to ap = __builtin_next_arg (0). */
3827 if (!TARGET_64BIT
&& !flag_split_stack
)
3828 targetm
.expand_builtin_va_start
= NULL
;
3832 ix86_gen_leave
= gen_leave_rex64
;
3833 if (Pmode
== DImode
)
3835 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3836 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3837 ix86_gen_tls_local_dynamic_base_64
3838 = gen_tls_local_dynamic_base_64_di
;
3842 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3843 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3844 ix86_gen_tls_local_dynamic_base_64
3845 = gen_tls_local_dynamic_base_64_si
;
3850 ix86_gen_leave
= gen_leave
;
3851 ix86_gen_monitor
= gen_sse3_monitor
;
3854 if (Pmode
== DImode
)
3856 ix86_gen_add3
= gen_adddi3
;
3857 ix86_gen_sub3
= gen_subdi3
;
3858 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3859 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3860 ix86_gen_andsp
= gen_anddi3
;
3861 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3862 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3863 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3867 ix86_gen_add3
= gen_addsi3
;
3868 ix86_gen_sub3
= gen_subsi3
;
3869 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3870 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3871 ix86_gen_andsp
= gen_andsi3
;
3872 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3873 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3874 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3878 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3880 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3883 if (!TARGET_64BIT
&& flag_pic
)
3885 if (flag_fentry
> 0)
3886 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3890 else if (TARGET_SEH
)
3892 if (flag_fentry
== 0)
3893 sorry ("-mno-fentry isn%'t compatible with SEH");
3896 else if (flag_fentry
< 0)
3898 #if defined(PROFILE_BEFORE_PROLOGUE)
3907 /* When not optimize for size, enable vzeroupper optimization for
3908 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3909 AVX unaligned load/store. */
3912 if (flag_expensive_optimizations
3913 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3914 target_flags
|= MASK_VZEROUPPER
;
3915 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3916 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3917 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3918 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3919 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3920 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3921 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3922 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
3923 target_flags
|= MASK_PREFER_AVX128
;
3928 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3929 target_flags
&= ~MASK_VZEROUPPER
;
3932 if (ix86_recip_name
)
3934 char *p
= ASTRDUP (ix86_recip_name
);
3936 unsigned int mask
, i
;
3939 while ((q
= strtok (p
, ",")) != NULL
)
3950 if (!strcmp (q
, "default"))
3951 mask
= RECIP_MASK_ALL
;
3954 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3955 if (!strcmp (q
, recip_options
[i
].string
))
3957 mask
= recip_options
[i
].mask
;
3961 if (i
== ARRAY_SIZE (recip_options
))
3963 error ("unknown option for -mrecip=%s", q
);
3965 mask
= RECIP_MASK_NONE
;
3969 recip_mask_explicit
|= mask
;
3971 recip_mask
&= ~mask
;
3978 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3979 else if (target_flags_explicit
& MASK_RECIP
)
3980 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3982 /* Save the initial options in case the user does function specific
3985 target_option_default_node
= target_option_current_node
3986 = build_target_option_node ();
3989 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3992 function_pass_avx256_p (const_rtx val
)
3997 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
4000 if (GET_CODE (val
) == PARALLEL
)
4005 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
4007 r
= XVECEXP (val
, 0, i
);
4008 if (GET_CODE (r
) == EXPR_LIST
4010 && REG_P (XEXP (r
, 0))
4011 && (GET_MODE (XEXP (r
, 0)) == OImode
4012 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
4020 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4023 ix86_option_override (void)
4025 ix86_option_override_internal (true);
4028 /* Update register usage after having seen the compiler flags. */
4031 ix86_conditional_register_usage (void)
4036 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4038 if (fixed_regs
[i
] > 1)
4039 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4040 if (call_used_regs
[i
] > 1)
4041 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4044 /* The PIC register, if it exists, is fixed. */
4045 j
= PIC_OFFSET_TABLE_REGNUM
;
4046 if (j
!= INVALID_REGNUM
)
4047 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4049 /* The 64-bit MS_ABI changes the set of call-used registers. */
4050 if (TARGET_64BIT_MS_ABI
)
4052 call_used_regs
[SI_REG
] = 0;
4053 call_used_regs
[DI_REG
] = 0;
4054 call_used_regs
[XMM6_REG
] = 0;
4055 call_used_regs
[XMM7_REG
] = 0;
4056 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4057 call_used_regs
[i
] = 0;
4060 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4061 other call-clobbered regs for 64-bit. */
4064 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4066 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4067 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4068 && call_used_regs
[i
])
4069 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4072 /* If MMX is disabled, squash the registers. */
4074 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4075 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4076 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4078 /* If SSE is disabled, squash the registers. */
4080 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4081 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4082 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4084 /* If the FPU is disabled, squash the registers. */
4085 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4086 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4087 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4088 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4090 /* If 32-bit, squash the 64-bit registers. */
4093 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4095 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4101 /* Save the current options */
4104 ix86_function_specific_save (struct cl_target_option
*ptr
)
4106 ptr
->arch
= ix86_arch
;
4107 ptr
->schedule
= ix86_schedule
;
4108 ptr
->tune
= ix86_tune
;
4109 ptr
->branch_cost
= ix86_branch_cost
;
4110 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4111 ptr
->arch_specified
= ix86_arch_specified
;
4112 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4113 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4114 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4116 /* The fields are char but the variables are not; make sure the
4117 values fit in the fields. */
4118 gcc_assert (ptr
->arch
== ix86_arch
);
4119 gcc_assert (ptr
->schedule
== ix86_schedule
);
4120 gcc_assert (ptr
->tune
== ix86_tune
);
4121 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4124 /* Restore the current options */
4127 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4129 enum processor_type old_tune
= ix86_tune
;
4130 enum processor_type old_arch
= ix86_arch
;
4131 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4134 ix86_arch
= (enum processor_type
) ptr
->arch
;
4135 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4136 ix86_tune
= (enum processor_type
) ptr
->tune
;
4137 ix86_branch_cost
= ptr
->branch_cost
;
4138 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4139 ix86_arch_specified
= ptr
->arch_specified
;
4140 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4141 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4142 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4144 /* Recreate the arch feature tests if the arch changed */
4145 if (old_arch
!= ix86_arch
)
4147 ix86_arch_mask
= 1u << ix86_arch
;
4148 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4149 ix86_arch_features
[i
]
4150 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4153 /* Recreate the tune optimization tests */
4154 if (old_tune
!= ix86_tune
)
4156 ix86_tune_mask
= 1u << ix86_tune
;
4157 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4158 ix86_tune_features
[i
]
4159 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4163 /* Print the current options */
4166 ix86_function_specific_print (FILE *file
, int indent
,
4167 struct cl_target_option
*ptr
)
4170 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4171 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4173 fprintf (file
, "%*sarch = %d (%s)\n",
4176 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4177 ? cpu_names
[ptr
->arch
]
4180 fprintf (file
, "%*stune = %d (%s)\n",
4183 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4184 ? cpu_names
[ptr
->tune
]
4187 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4191 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4192 free (target_string
);
4197 /* Inner function to process the attribute((target(...))), take an argument and
4198 set the current options from the argument. If we have a list, recursively go
4202 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4203 struct gcc_options
*enum_opts_set
)
4208 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4209 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4210 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4211 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4212 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4228 enum ix86_opt_type type
;
4233 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4234 IX86_ATTR_ISA ("abm", OPT_mabm
),
4235 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4236 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4237 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4238 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4239 IX86_ATTR_ISA ("aes", OPT_maes
),
4240 IX86_ATTR_ISA ("avx", OPT_mavx
),
4241 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4242 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4243 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4244 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4245 IX86_ATTR_ISA ("sse", OPT_msse
),
4246 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4247 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4248 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4249 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4250 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4251 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4252 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4253 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4254 IX86_ATTR_ISA ("fma", OPT_mfma
),
4255 IX86_ATTR_ISA ("xop", OPT_mxop
),
4256 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4257 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4258 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4259 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4260 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4261 IX86_ATTR_ISA ("hle", OPT_mhle
),
4264 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4266 /* string options */
4267 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4268 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4271 IX86_ATTR_YES ("cld",
4275 IX86_ATTR_NO ("fancy-math-387",
4276 OPT_mfancy_math_387
,
4277 MASK_NO_FANCY_MATH_387
),
4279 IX86_ATTR_YES ("ieee-fp",
4283 IX86_ATTR_YES ("inline-all-stringops",
4284 OPT_minline_all_stringops
,
4285 MASK_INLINE_ALL_STRINGOPS
),
4287 IX86_ATTR_YES ("inline-stringops-dynamically",
4288 OPT_minline_stringops_dynamically
,
4289 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4291 IX86_ATTR_NO ("align-stringops",
4292 OPT_mno_align_stringops
,
4293 MASK_NO_ALIGN_STRINGOPS
),
4295 IX86_ATTR_YES ("recip",
4301 /* If this is a list, recurse to get the options. */
4302 if (TREE_CODE (args
) == TREE_LIST
)
4306 for (; args
; args
= TREE_CHAIN (args
))
4307 if (TREE_VALUE (args
)
4308 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4309 p_strings
, enum_opts_set
))
4315 else if (TREE_CODE (args
) != STRING_CST
)
4318 /* Handle multiple arguments separated by commas. */
4319 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4321 while (next_optstr
&& *next_optstr
!= '\0')
4323 char *p
= next_optstr
;
4325 char *comma
= strchr (next_optstr
, ',');
4326 const char *opt_string
;
4327 size_t len
, opt_len
;
4332 enum ix86_opt_type type
= ix86_opt_unknown
;
4338 len
= comma
- next_optstr
;
4339 next_optstr
= comma
+ 1;
4347 /* Recognize no-xxx. */
4348 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4357 /* Find the option. */
4360 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4362 type
= attrs
[i
].type
;
4363 opt_len
= attrs
[i
].len
;
4364 if (ch
== attrs
[i
].string
[0]
4365 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4368 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4371 mask
= attrs
[i
].mask
;
4372 opt_string
= attrs
[i
].string
;
4377 /* Process the option. */
4380 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4384 else if (type
== ix86_opt_isa
)
4386 struct cl_decoded_option decoded
;
4388 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4389 ix86_handle_option (&global_options
, &global_options_set
,
4390 &decoded
, input_location
);
4393 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4395 if (type
== ix86_opt_no
)
4396 opt_set_p
= !opt_set_p
;
4399 target_flags
|= mask
;
4401 target_flags
&= ~mask
;
4404 else if (type
== ix86_opt_str
)
4408 error ("option(\"%s\") was already specified", opt_string
);
4412 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4415 else if (type
== ix86_opt_enum
)
4420 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4422 set_option (&global_options
, enum_opts_set
, opt
, value
,
4423 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4427 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4439 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4442 ix86_valid_target_attribute_tree (tree args
)
4444 const char *orig_arch_string
= ix86_arch_string
;
4445 const char *orig_tune_string
= ix86_tune_string
;
4446 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4447 int orig_tune_defaulted
= ix86_tune_defaulted
;
4448 int orig_arch_specified
= ix86_arch_specified
;
4449 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4452 struct cl_target_option
*def
4453 = TREE_TARGET_OPTION (target_option_default_node
);
4454 struct gcc_options enum_opts_set
;
4456 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4458 /* Process each of the options on the chain. */
4459 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4463 /* If the changed options are different from the default, rerun
4464 ix86_option_override_internal, and then save the options away.
4465 The string options are are attribute options, and will be undone
4466 when we copy the save structure. */
4467 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4468 || target_flags
!= def
->x_target_flags
4469 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4470 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4471 || enum_opts_set
.x_ix86_fpmath
)
4473 /* If we are using the default tune= or arch=, undo the string assigned,
4474 and use the default. */
4475 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4476 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4477 else if (!orig_arch_specified
)
4478 ix86_arch_string
= NULL
;
4480 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4481 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4482 else if (orig_tune_defaulted
)
4483 ix86_tune_string
= NULL
;
4485 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4486 if (enum_opts_set
.x_ix86_fpmath
)
4487 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4488 else if (!TARGET_64BIT
&& TARGET_SSE
)
4490 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4491 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4494 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4495 ix86_option_override_internal (false);
4497 /* Add any builtin functions with the new isa if any. */
4498 ix86_add_new_builtins (ix86_isa_flags
);
4500 /* Save the current options unless we are validating options for
4502 t
= build_target_option_node ();
4504 ix86_arch_string
= orig_arch_string
;
4505 ix86_tune_string
= orig_tune_string
;
4506 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4508 /* Free up memory allocated to hold the strings */
4509 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4510 free (option_strings
[i
]);
4516 /* Hook to validate attribute((target("string"))). */
4519 ix86_valid_target_attribute_p (tree fndecl
,
4520 tree
ARG_UNUSED (name
),
4522 int ARG_UNUSED (flags
))
4524 struct cl_target_option cur_target
;
4526 tree old_optimize
= build_optimization_node ();
4527 tree new_target
, new_optimize
;
4528 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4530 /* If the function changed the optimization levels as well as setting target
4531 options, start with the optimizations specified. */
4532 if (func_optimize
&& func_optimize
!= old_optimize
)
4533 cl_optimization_restore (&global_options
,
4534 TREE_OPTIMIZATION (func_optimize
));
4536 /* The target attributes may also change some optimization flags, so update
4537 the optimization options if necessary. */
4538 cl_target_option_save (&cur_target
, &global_options
);
4539 new_target
= ix86_valid_target_attribute_tree (args
);
4540 new_optimize
= build_optimization_node ();
4547 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4549 if (old_optimize
!= new_optimize
)
4550 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4553 cl_target_option_restore (&global_options
, &cur_target
);
4555 if (old_optimize
!= new_optimize
)
4556 cl_optimization_restore (&global_options
,
4557 TREE_OPTIMIZATION (old_optimize
));
4563 /* Hook to determine if one function can safely inline another. */
4566 ix86_can_inline_p (tree caller
, tree callee
)
4569 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4570 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4572 /* If callee has no option attributes, then it is ok to inline. */
4576 /* If caller has no option attributes, but callee does then it is not ok to
4578 else if (!caller_tree
)
4583 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4584 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4586 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4587 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4589 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4590 != callee_opts
->x_ix86_isa_flags
)
4593 /* See if we have the same non-isa options. */
4594 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4597 /* See if arch, tune, etc. are the same. */
4598 else if (caller_opts
->arch
!= callee_opts
->arch
)
4601 else if (caller_opts
->tune
!= callee_opts
->tune
)
4604 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4607 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4618 /* Remember the last target of ix86_set_current_function. */
4619 static GTY(()) tree ix86_previous_fndecl
;
4621 /* Establish appropriate back-end context for processing the function
4622 FNDECL. The argument might be NULL to indicate processing at top
4623 level, outside of any function scope. */
4625 ix86_set_current_function (tree fndecl
)
4627 /* Only change the context if the function changes. This hook is called
4628 several times in the course of compiling a function, and we don't want to
4629 slow things down too much or call target_reinit when it isn't safe. */
4630 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4632 tree old_tree
= (ix86_previous_fndecl
4633 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4636 tree new_tree
= (fndecl
4637 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4640 ix86_previous_fndecl
= fndecl
;
4641 if (old_tree
== new_tree
)
4646 cl_target_option_restore (&global_options
,
4647 TREE_TARGET_OPTION (new_tree
));
4653 struct cl_target_option
*def
4654 = TREE_TARGET_OPTION (target_option_current_node
);
4656 cl_target_option_restore (&global_options
, def
);
4663 /* Return true if this goes in large data/bss. */
4666 ix86_in_large_data_p (tree exp
)
4668 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4671 /* Functions are never large data. */
4672 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4675 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4677 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4678 if (strcmp (section
, ".ldata") == 0
4679 || strcmp (section
, ".lbss") == 0)
4685 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4687 /* If this is an incomplete type with size 0, then we can't put it
4688 in data because it might be too big when completed. */
4689 if (!size
|| size
> ix86_section_threshold
)
4696 /* Switch to the appropriate section for output of DECL.
4697 DECL is either a `VAR_DECL' node or a constant of some sort.
4698 RELOC indicates whether forming the initial value of DECL requires
4699 link-time relocations. */
4701 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4705 x86_64_elf_select_section (tree decl
, int reloc
,
4706 unsigned HOST_WIDE_INT align
)
4708 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4709 && ix86_in_large_data_p (decl
))
4711 const char *sname
= NULL
;
4712 unsigned int flags
= SECTION_WRITE
;
4713 switch (categorize_decl_for_section (decl
, reloc
))
4718 case SECCAT_DATA_REL
:
4719 sname
= ".ldata.rel";
4721 case SECCAT_DATA_REL_LOCAL
:
4722 sname
= ".ldata.rel.local";
4724 case SECCAT_DATA_REL_RO
:
4725 sname
= ".ldata.rel.ro";
4727 case SECCAT_DATA_REL_RO_LOCAL
:
4728 sname
= ".ldata.rel.ro.local";
4732 flags
|= SECTION_BSS
;
4735 case SECCAT_RODATA_MERGE_STR
:
4736 case SECCAT_RODATA_MERGE_STR_INIT
:
4737 case SECCAT_RODATA_MERGE_CONST
:
4741 case SECCAT_SRODATA
:
4748 /* We don't split these for medium model. Place them into
4749 default sections and hope for best. */
4754 /* We might get called with string constants, but get_named_section
4755 doesn't like them as they are not DECLs. Also, we need to set
4756 flags in that case. */
4758 return get_section (sname
, flags
, NULL
);
4759 return get_named_section (decl
, sname
, reloc
);
4762 return default_elf_select_section (decl
, reloc
, align
);
4765 /* Build up a unique section name, expressed as a
4766 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4767 RELOC indicates whether the initial value of EXP requires
4768 link-time relocations. */
4770 static void ATTRIBUTE_UNUSED
4771 x86_64_elf_unique_section (tree decl
, int reloc
)
4773 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4774 && ix86_in_large_data_p (decl
))
4776 const char *prefix
= NULL
;
4777 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4778 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4780 switch (categorize_decl_for_section (decl
, reloc
))
4783 case SECCAT_DATA_REL
:
4784 case SECCAT_DATA_REL_LOCAL
:
4785 case SECCAT_DATA_REL_RO
:
4786 case SECCAT_DATA_REL_RO_LOCAL
:
4787 prefix
= one_only
? ".ld" : ".ldata";
4790 prefix
= one_only
? ".lb" : ".lbss";
4793 case SECCAT_RODATA_MERGE_STR
:
4794 case SECCAT_RODATA_MERGE_STR_INIT
:
4795 case SECCAT_RODATA_MERGE_CONST
:
4796 prefix
= one_only
? ".lr" : ".lrodata";
4798 case SECCAT_SRODATA
:
4805 /* We don't split these for medium model. Place them into
4806 default sections and hope for best. */
4811 const char *name
, *linkonce
;
4814 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4815 name
= targetm
.strip_name_encoding (name
);
4817 /* If we're using one_only, then there needs to be a .gnu.linkonce
4818 prefix to the section name. */
4819 linkonce
= one_only
? ".gnu.linkonce" : "";
4821 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4823 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4827 default_unique_section (decl
, reloc
);
4830 #ifdef COMMON_ASM_OP
4831 /* This says how to output assembler code to declare an
4832 uninitialized external linkage data object.
4834 For medium model x86-64 we need to use .largecomm opcode for
4837 x86_elf_aligned_common (FILE *file
,
4838 const char *name
, unsigned HOST_WIDE_INT size
,
4841 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4842 && size
> (unsigned int)ix86_section_threshold
)
4843 fputs (".largecomm\t", file
);
4845 fputs (COMMON_ASM_OP
, file
);
4846 assemble_name (file
, name
);
4847 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4848 size
, align
/ BITS_PER_UNIT
);
4852 /* Utility function for targets to use in implementing
4853 ASM_OUTPUT_ALIGNED_BSS. */
4856 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4857 const char *name
, unsigned HOST_WIDE_INT size
,
4860 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4861 && size
> (unsigned int)ix86_section_threshold
)
4862 switch_to_section (get_named_section (decl
, ".lbss", 0));
4864 switch_to_section (bss_section
);
4865 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4866 #ifdef ASM_DECLARE_OBJECT_NAME
4867 last_assemble_variable_decl
= decl
;
4868 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4870 /* Standard thing is just output label for the object. */
4871 ASM_OUTPUT_LABEL (file
, name
);
4872 #endif /* ASM_DECLARE_OBJECT_NAME */
4873 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4876 /* Decide whether we must probe the stack before any space allocation
4877 on this target. It's essentially TARGET_STACK_PROBE except when
4878 -fstack-check causes the stack to be already probed differently. */
4881 ix86_target_stack_probe (void)
4883 /* Do not probe the stack twice if static stack checking is enabled. */
4884 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4887 return TARGET_STACK_PROBE
;
4890 /* Decide whether we can make a sibling call to a function. DECL is the
4891 declaration of the function being targeted by the call and EXP is the
4892 CALL_EXPR representing the call. */
4895 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4897 tree type
, decl_or_type
;
4900 /* If we are generating position-independent code, we cannot sibcall
4901 optimize any indirect call, or a direct call to a global function,
4902 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4906 && (!decl
|| !targetm
.binds_local_p (decl
)))
4909 /* If we need to align the outgoing stack, then sibcalling would
4910 unalign the stack, which may break the called function. */
4911 if (ix86_minimum_incoming_stack_boundary (true)
4912 < PREFERRED_STACK_BOUNDARY
)
4917 decl_or_type
= decl
;
4918 type
= TREE_TYPE (decl
);
4922 /* We're looking at the CALL_EXPR, we need the type of the function. */
4923 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4924 type
= TREE_TYPE (type
); /* pointer type */
4925 type
= TREE_TYPE (type
); /* function type */
4926 decl_or_type
= type
;
4929 /* Check that the return value locations are the same. Like
4930 if we are returning floats on the 80387 register stack, we cannot
4931 make a sibcall from a function that doesn't return a float to a
4932 function that does or, conversely, from a function that does return
4933 a float to a function that doesn't; the necessary stack adjustment
4934 would not be executed. This is also the place we notice
4935 differences in the return value ABI. Note that it is ok for one
4936 of the functions to have void return type as long as the return
4937 value of the other is passed in a register. */
4938 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4939 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4941 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4943 if (!rtx_equal_p (a
, b
))
4946 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4948 /* Disable sibcall if we need to generate vzeroupper after
4950 if (TARGET_VZEROUPPER
4951 && cfun
->machine
->callee_return_avx256_p
4952 && !cfun
->machine
->caller_return_avx256_p
)
4955 else if (!rtx_equal_p (a
, b
))
4960 /* The SYSV ABI has more call-clobbered registers;
4961 disallow sibcalls from MS to SYSV. */
4962 if (cfun
->machine
->call_abi
== MS_ABI
4963 && ix86_function_type_abi (type
) == SYSV_ABI
)
4968 /* If this call is indirect, we'll need to be able to use a
4969 call-clobbered register for the address of the target function.
4970 Make sure that all such registers are not used for passing
4971 parameters. Note that DLLIMPORT functions are indirect. */
4973 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4975 if (ix86_function_regparm (type
, NULL
) >= 3)
4977 /* ??? Need to count the actual number of registers to be used,
4978 not the possible number of registers. Fix later. */
4984 /* Otherwise okay. That also includes certain types of indirect calls. */
4988 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4989 and "sseregparm" calling convention attributes;
4990 arguments as in struct attribute_spec.handler. */
4993 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4995 int flags ATTRIBUTE_UNUSED
,
4998 if (TREE_CODE (*node
) != FUNCTION_TYPE
4999 && TREE_CODE (*node
) != METHOD_TYPE
5000 && TREE_CODE (*node
) != FIELD_DECL
5001 && TREE_CODE (*node
) != TYPE_DECL
)
5003 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5005 *no_add_attrs
= true;
5009 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5010 if (is_attribute_p ("regparm", name
))
5014 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5016 error ("fastcall and regparm attributes are not compatible");
5019 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5021 error ("regparam and thiscall attributes are not compatible");
5024 cst
= TREE_VALUE (args
);
5025 if (TREE_CODE (cst
) != INTEGER_CST
)
5027 warning (OPT_Wattributes
,
5028 "%qE attribute requires an integer constant argument",
5030 *no_add_attrs
= true;
5032 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5034 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5036 *no_add_attrs
= true;
5044 /* Do not warn when emulating the MS ABI. */
5045 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5046 && TREE_CODE (*node
) != METHOD_TYPE
)
5047 || ix86_function_type_abi (*node
) != MS_ABI
)
5048 warning (OPT_Wattributes
, "%qE attribute ignored",
5050 *no_add_attrs
= true;
5054 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5055 if (is_attribute_p ("fastcall", name
))
5057 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5059 error ("fastcall and cdecl attributes are not compatible");
5061 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5063 error ("fastcall and stdcall attributes are not compatible");
5065 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5067 error ("fastcall and regparm attributes are not compatible");
5069 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5071 error ("fastcall and thiscall attributes are not compatible");
5075 /* Can combine stdcall with fastcall (redundant), regparm and
5077 else if (is_attribute_p ("stdcall", name
))
5079 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5081 error ("stdcall and cdecl attributes are not compatible");
5083 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5085 error ("stdcall and fastcall attributes are not compatible");
5087 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5089 error ("stdcall and thiscall attributes are not compatible");
5093 /* Can combine cdecl with regparm and sseregparm. */
5094 else if (is_attribute_p ("cdecl", name
))
5096 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5098 error ("stdcall and cdecl attributes are not compatible");
5100 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5102 error ("fastcall and cdecl attributes are not compatible");
5104 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5106 error ("cdecl and thiscall attributes are not compatible");
5109 else if (is_attribute_p ("thiscall", name
))
5111 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5112 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5114 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5116 error ("stdcall and thiscall attributes are not compatible");
5118 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5120 error ("fastcall and thiscall attributes are not compatible");
5122 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5124 error ("cdecl and thiscall attributes are not compatible");
5128 /* Can combine sseregparm with all attributes. */
5133 /* The transactional memory builtins are implicitly regparm or fastcall
5134 depending on the ABI. Override the generic do-nothing attribute that
5135 these builtins were declared with, and replace it with one of the two
5136 attributes that we expect elsewhere. */
5139 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5140 tree args ATTRIBUTE_UNUSED
,
5141 int flags ATTRIBUTE_UNUSED
,
5146 /* In no case do we want to add the placeholder attribute. */
5147 *no_add_attrs
= true;
5149 /* The 64-bit ABI is unchanged for transactional memory. */
5153 /* ??? Is there a better way to validate 32-bit windows? We have
5154 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5155 if (CHECK_STACK_LIMIT
> 0)
5156 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5159 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5160 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5162 decl_attributes (node
, alt
, flags
);
5167 /* This function determines from TYPE the calling-convention. */
5170 ix86_get_callcvt (const_tree type
)
5172 unsigned int ret
= 0;
5177 return IX86_CALLCVT_CDECL
;
5179 attrs
= TYPE_ATTRIBUTES (type
);
5180 if (attrs
!= NULL_TREE
)
5182 if (lookup_attribute ("cdecl", attrs
))
5183 ret
|= IX86_CALLCVT_CDECL
;
5184 else if (lookup_attribute ("stdcall", attrs
))
5185 ret
|= IX86_CALLCVT_STDCALL
;
5186 else if (lookup_attribute ("fastcall", attrs
))
5187 ret
|= IX86_CALLCVT_FASTCALL
;
5188 else if (lookup_attribute ("thiscall", attrs
))
5189 ret
|= IX86_CALLCVT_THISCALL
;
5191 /* Regparam isn't allowed for thiscall and fastcall. */
5192 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5194 if (lookup_attribute ("regparm", attrs
))
5195 ret
|= IX86_CALLCVT_REGPARM
;
5196 if (lookup_attribute ("sseregparm", attrs
))
5197 ret
|= IX86_CALLCVT_SSEREGPARM
;
5200 if (IX86_BASE_CALLCVT(ret
) != 0)
5204 is_stdarg
= stdarg_p (type
);
5205 if (TARGET_RTD
&& !is_stdarg
)
5206 return IX86_CALLCVT_STDCALL
| ret
;
5210 || TREE_CODE (type
) != METHOD_TYPE
5211 || ix86_function_type_abi (type
) != MS_ABI
)
5212 return IX86_CALLCVT_CDECL
| ret
;
5214 return IX86_CALLCVT_THISCALL
;
5217 /* Return 0 if the attributes for two types are incompatible, 1 if they
5218 are compatible, and 2 if they are nearly compatible (which causes a
5219 warning to be generated). */
5222 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5224 unsigned int ccvt1
, ccvt2
;
5226 if (TREE_CODE (type1
) != FUNCTION_TYPE
5227 && TREE_CODE (type1
) != METHOD_TYPE
)
5230 ccvt1
= ix86_get_callcvt (type1
);
5231 ccvt2
= ix86_get_callcvt (type2
);
5234 if (ix86_function_regparm (type1
, NULL
)
5235 != ix86_function_regparm (type2
, NULL
))
5241 /* Return the regparm value for a function with the indicated TYPE and DECL.
5242 DECL may be NULL when calling function indirectly
5243 or considering a libcall. */
5246 ix86_function_regparm (const_tree type
, const_tree decl
)
5253 return (ix86_function_type_abi (type
) == SYSV_ABI
5254 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5255 ccvt
= ix86_get_callcvt (type
);
5256 regparm
= ix86_regparm
;
5258 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5260 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5263 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5267 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5269 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5272 /* Use register calling convention for local functions when possible. */
5274 && TREE_CODE (decl
) == FUNCTION_DECL
5276 && !(profile_flag
&& !flag_fentry
))
5278 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5279 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5280 if (i
&& i
->local
&& i
->can_change_signature
)
5282 int local_regparm
, globals
= 0, regno
;
5284 /* Make sure no regparm register is taken by a
5285 fixed register variable. */
5286 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5287 if (fixed_regs
[local_regparm
])
5290 /* We don't want to use regparm(3) for nested functions as
5291 these use a static chain pointer in the third argument. */
5292 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5295 /* In 32-bit mode save a register for the split stack. */
5296 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5299 /* Each fixed register usage increases register pressure,
5300 so less registers should be used for argument passing.
5301 This functionality can be overriden by an explicit
5303 for (regno
= 0; regno
<= DI_REG
; regno
++)
5304 if (fixed_regs
[regno
])
5308 = globals
< local_regparm
? local_regparm
- globals
: 0;
5310 if (local_regparm
> regparm
)
5311 regparm
= local_regparm
;
5318 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5319 DFmode (2) arguments in SSE registers for a function with the
5320 indicated TYPE and DECL. DECL may be NULL when calling function
5321 indirectly or considering a libcall. Otherwise return 0. */
5324 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5326 gcc_assert (!TARGET_64BIT
);
5328 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5329 by the sseregparm attribute. */
5330 if (TARGET_SSEREGPARM
5331 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5338 error ("calling %qD with attribute sseregparm without "
5339 "SSE/SSE2 enabled", decl
);
5341 error ("calling %qT with attribute sseregparm without "
5342 "SSE/SSE2 enabled", type
);
5350 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5351 (and DFmode for SSE2) arguments in SSE registers. */
5352 if (decl
&& TARGET_SSE_MATH
&& optimize
5353 && !(profile_flag
&& !flag_fentry
))
5355 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5356 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5357 if (i
&& i
->local
&& i
->can_change_signature
)
5358 return TARGET_SSE2
? 2 : 1;
5364 /* Return true if EAX is live at the start of the function. Used by
5365 ix86_expand_prologue to determine if we need special help before
5366 calling allocate_stack_worker. */
5369 ix86_eax_live_at_start_p (void)
5371 /* Cheat. Don't bother working forward from ix86_function_regparm
5372 to the function type to whether an actual argument is located in
5373 eax. Instead just look at cfg info, which is still close enough
5374 to correct at this point. This gives false positives for broken
5375 functions that might use uninitialized data that happens to be
5376 allocated in eax, but who cares? */
5377 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5381 ix86_keep_aggregate_return_pointer (tree fntype
)
5387 attr
= lookup_attribute ("callee_pop_aggregate_return",
5388 TYPE_ATTRIBUTES (fntype
));
5390 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5392 /* For 32-bit MS-ABI the default is to keep aggregate
5394 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5397 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5400 /* Value is the number of bytes of arguments automatically
5401 popped when returning from a subroutine call.
5402 FUNDECL is the declaration node of the function (as a tree),
5403 FUNTYPE is the data type of the function (as a tree),
5404 or for a library call it is an identifier node for the subroutine name.
5405 SIZE is the number of bytes of arguments passed on the stack.
5407 On the 80386, the RTD insn may be used to pop them if the number
5408 of args is fixed, but if the number is variable then the caller
5409 must pop them all. RTD can't be used for library calls now
5410 because the library is compiled with the Unix compiler.
5411 Use of RTD is a selectable option, since it is incompatible with
5412 standard Unix calling sequences. If the option is not selected,
5413 the caller must always pop the args.
5415 The attribute stdcall is equivalent to RTD on a per module basis. */
5418 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5422 /* None of the 64-bit ABIs pop arguments. */
5426 ccvt
= ix86_get_callcvt (funtype
);
5428 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5429 | IX86_CALLCVT_THISCALL
)) != 0
5430 && ! stdarg_p (funtype
))
5433 /* Lose any fake structure return argument if it is passed on the stack. */
5434 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5435 && !ix86_keep_aggregate_return_pointer (funtype
))
5437 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5439 return GET_MODE_SIZE (Pmode
);
5445 /* Argument support functions. */
5447 /* Return true when register may be used to pass function parameters. */
5449 ix86_function_arg_regno_p (int regno
)
5452 const int *parm_regs
;
5457 return (regno
< REGPARM_MAX
5458 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5460 return (regno
< REGPARM_MAX
5461 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5462 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5463 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5464 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5469 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5474 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5475 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5479 /* TODO: The function should depend on current function ABI but
5480 builtins.c would need updating then. Therefore we use the
5483 /* RAX is used as hidden argument to va_arg functions. */
5484 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5487 if (ix86_abi
== MS_ABI
)
5488 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5490 parm_regs
= x86_64_int_parameter_registers
;
5491 for (i
= 0; i
< (ix86_abi
== MS_ABI
5492 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5493 if (regno
== parm_regs
[i
])
5498 /* Return if we do not know how to pass TYPE solely in registers. */
5501 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5503 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5506 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5507 The layout_type routine is crafty and tries to trick us into passing
5508 currently unsupported vector types on the stack by using TImode. */
5509 return (!TARGET_64BIT
&& mode
== TImode
5510 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5513 /* It returns the size, in bytes, of the area reserved for arguments passed
5514 in registers for the function represented by fndecl dependent to the used
5517 ix86_reg_parm_stack_space (const_tree fndecl
)
5519 enum calling_abi call_abi
= SYSV_ABI
;
5520 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5521 call_abi
= ix86_function_abi (fndecl
);
5523 call_abi
= ix86_function_type_abi (fndecl
);
5524 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5529 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5532 ix86_function_type_abi (const_tree fntype
)
5534 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5536 enum calling_abi abi
= ix86_abi
;
5537 if (abi
== SYSV_ABI
)
5539 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5542 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5550 ix86_function_ms_hook_prologue (const_tree fn
)
5552 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5554 if (decl_function_context (fn
) != NULL_TREE
)
5555 error_at (DECL_SOURCE_LOCATION (fn
),
5556 "ms_hook_prologue is not compatible with nested function");
5563 static enum calling_abi
5564 ix86_function_abi (const_tree fndecl
)
5568 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5571 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5574 ix86_cfun_abi (void)
5578 return cfun
->machine
->call_abi
;
5581 /* Write the extra assembler code needed to declare a function properly. */
5584 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5587 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5591 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5592 unsigned int filler_cc
= 0xcccccccc;
5594 for (i
= 0; i
< filler_count
; i
+= 4)
5595 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5598 #ifdef SUBTARGET_ASM_UNWIND_INIT
5599 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5602 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5604 /* Output magic byte marker, if hot-patch attribute is set. */
5609 /* leaq [%rsp + 0], %rsp */
5610 asm_fprintf (asm_out_file
, ASM_BYTE
5611 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5615 /* movl.s %edi, %edi
5617 movl.s %esp, %ebp */
5618 asm_fprintf (asm_out_file
, ASM_BYTE
5619 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5625 extern void init_regs (void);
5627 /* Implementation of call abi switching target hook. Specific to FNDECL
5628 the specific call register sets are set. See also
5629 ix86_conditional_register_usage for more details. */
5631 ix86_call_abi_override (const_tree fndecl
)
5633 if (fndecl
== NULL_TREE
)
5634 cfun
->machine
->call_abi
= ix86_abi
;
5636 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5639 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5640 expensive re-initialization of init_regs each time we switch function context
5641 since this is needed only during RTL expansion. */
5643 ix86_maybe_switch_abi (void)
5646 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5650 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5651 for a call to a function whose data type is FNTYPE.
5652 For a library call, FNTYPE is 0. */
5655 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5656 tree fntype
, /* tree ptr for function decl */
5657 rtx libname
, /* SYMBOL_REF of library name or 0 */
5661 struct cgraph_local_info
*i
;
5664 memset (cum
, 0, sizeof (*cum
));
5666 /* Initialize for the current callee. */
5669 cfun
->machine
->callee_pass_avx256_p
= false;
5670 cfun
->machine
->callee_return_avx256_p
= false;
5675 i
= cgraph_local_info (fndecl
);
5676 cum
->call_abi
= ix86_function_abi (fndecl
);
5677 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5682 cum
->call_abi
= ix86_function_type_abi (fntype
);
5684 fnret_type
= TREE_TYPE (fntype
);
5689 if (TARGET_VZEROUPPER
&& fnret_type
)
5691 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5693 if (function_pass_avx256_p (fnret_value
))
5695 /* The return value of this function uses 256bit AVX modes. */
5697 cfun
->machine
->callee_return_avx256_p
= true;
5699 cfun
->machine
->caller_return_avx256_p
= true;
5703 cum
->caller
= caller
;
5705 /* Set up the number of registers to use for passing arguments. */
5707 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5708 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5709 "or subtarget optimization implying it");
5710 cum
->nregs
= ix86_regparm
;
5713 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5714 ? X86_64_REGPARM_MAX
5715 : X86_64_MS_REGPARM_MAX
);
5719 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5722 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5723 ? X86_64_SSE_REGPARM_MAX
5724 : X86_64_MS_SSE_REGPARM_MAX
);
5728 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5729 cum
->warn_avx
= true;
5730 cum
->warn_sse
= true;
5731 cum
->warn_mmx
= true;
5733 /* Because type might mismatch in between caller and callee, we need to
5734 use actual type of function for local calls.
5735 FIXME: cgraph_analyze can be told to actually record if function uses
5736 va_start so for local functions maybe_vaarg can be made aggressive
5738 FIXME: once typesytem is fixed, we won't need this code anymore. */
5739 if (i
&& i
->local
&& i
->can_change_signature
)
5740 fntype
= TREE_TYPE (fndecl
);
5741 cum
->maybe_vaarg
= (fntype
5742 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5747 /* If there are variable arguments, then we won't pass anything
5748 in registers in 32-bit mode. */
5749 if (stdarg_p (fntype
))
5760 /* Use ecx and edx registers if function has fastcall attribute,
5761 else look for regparm information. */
5764 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5765 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5768 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5770 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5776 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5779 /* Set up the number of SSE registers used for passing SFmode
5780 and DFmode arguments. Warn for mismatching ABI. */
5781 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5785 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5786 But in the case of vector types, it is some vector mode.
5788 When we have only some of our vector isa extensions enabled, then there
5789 are some modes for which vector_mode_supported_p is false. For these
5790 modes, the generic vector support in gcc will choose some non-vector mode
5791 in order to implement the type. By computing the natural mode, we'll
5792 select the proper ABI location for the operand and not depend on whatever
5793 the middle-end decides to do with these vector types.
5795 The midde-end can't deal with the vector types > 16 bytes. In this
5796 case, we return the original mode and warn ABI change if CUM isn't
5799 static enum machine_mode
5800 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5802 enum machine_mode mode
= TYPE_MODE (type
);
5804 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5806 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5807 if ((size
== 8 || size
== 16 || size
== 32)
5808 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5809 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5811 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5813 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5814 mode
= MIN_MODE_VECTOR_FLOAT
;
5816 mode
= MIN_MODE_VECTOR_INT
;
5818 /* Get the mode which has this inner mode and number of units. */
5819 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5820 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5821 && GET_MODE_INNER (mode
) == innermode
)
5823 if (size
== 32 && !TARGET_AVX
)
5825 static bool warnedavx
;
5832 warning (0, "AVX vector argument without AVX "
5833 "enabled changes the ABI");
5835 return TYPE_MODE (type
);
5837 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5839 static bool warnedsse
;
5846 warning (0, "SSE vector argument without SSE "
5847 "enabled changes the ABI");
5862 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5863 this may not agree with the mode that the type system has chosen for the
5864 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5865 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5868 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5873 if (orig_mode
!= BLKmode
)
5874 tmp
= gen_rtx_REG (orig_mode
, regno
);
5877 tmp
= gen_rtx_REG (mode
, regno
);
5878 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5879 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5885 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5886 of this code is to classify each 8bytes of incoming argument by the register
5887 class and assign registers accordingly. */
5889 /* Return the union class of CLASS1 and CLASS2.
5890 See the x86-64 PS ABI for details. */
5892 static enum x86_64_reg_class
5893 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5895 /* Rule #1: If both classes are equal, this is the resulting class. */
5896 if (class1
== class2
)
5899 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5901 if (class1
== X86_64_NO_CLASS
)
5903 if (class2
== X86_64_NO_CLASS
)
5906 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5907 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5908 return X86_64_MEMORY_CLASS
;
5910 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5911 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5912 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5913 return X86_64_INTEGERSI_CLASS
;
5914 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5915 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5916 return X86_64_INTEGER_CLASS
;
5918 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5920 if (class1
== X86_64_X87_CLASS
5921 || class1
== X86_64_X87UP_CLASS
5922 || class1
== X86_64_COMPLEX_X87_CLASS
5923 || class2
== X86_64_X87_CLASS
5924 || class2
== X86_64_X87UP_CLASS
5925 || class2
== X86_64_COMPLEX_X87_CLASS
)
5926 return X86_64_MEMORY_CLASS
;
5928 /* Rule #6: Otherwise class SSE is used. */
5929 return X86_64_SSE_CLASS
;
5932 /* Classify the argument of type TYPE and mode MODE.
5933 CLASSES will be filled by the register class used to pass each word
5934 of the operand. The number of words is returned. In case the parameter
5935 should be passed in memory, 0 is returned. As a special case for zero
5936 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5938 BIT_OFFSET is used internally for handling records and specifies offset
5939 of the offset in bits modulo 256 to avoid overflow cases.
5941 See the x86-64 PS ABI for details.
5945 classify_argument (enum machine_mode mode
, const_tree type
,
5946 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5948 HOST_WIDE_INT bytes
=
5949 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5951 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5953 /* Variable sized entities are always passed/returned in memory. */
5957 if (mode
!= VOIDmode
5958 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5961 if (type
&& AGGREGATE_TYPE_P (type
))
5965 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5967 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5971 for (i
= 0; i
< words
; i
++)
5972 classes
[i
] = X86_64_NO_CLASS
;
5974 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5975 signalize memory class, so handle it as special case. */
5978 classes
[0] = X86_64_NO_CLASS
;
5982 /* Classify each field of record and merge classes. */
5983 switch (TREE_CODE (type
))
5986 /* And now merge the fields of structure. */
5987 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5989 if (TREE_CODE (field
) == FIELD_DECL
)
5993 if (TREE_TYPE (field
) == error_mark_node
)
5996 /* Bitfields are always classified as integer. Handle them
5997 early, since later code would consider them to be
5998 misaligned integers. */
5999 if (DECL_BIT_FIELD (field
))
6001 for (i
= (int_bit_position (field
)
6002 + (bit_offset
% 64)) / 8 / 8;
6003 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6004 + tree_low_cst (DECL_SIZE (field
), 0)
6007 merge_classes (X86_64_INTEGER_CLASS
,
6014 type
= TREE_TYPE (field
);
6016 /* Flexible array member is ignored. */
6017 if (TYPE_MODE (type
) == BLKmode
6018 && TREE_CODE (type
) == ARRAY_TYPE
6019 && TYPE_SIZE (type
) == NULL_TREE
6020 && TYPE_DOMAIN (type
) != NULL_TREE
6021 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6026 if (!warned
&& warn_psabi
)
6029 inform (input_location
,
6030 "the ABI of passing struct with"
6031 " a flexible array member has"
6032 " changed in GCC 4.4");
6036 num
= classify_argument (TYPE_MODE (type
), type
,
6038 (int_bit_position (field
)
6039 + bit_offset
) % 256);
6042 pos
= (int_bit_position (field
)
6043 + (bit_offset
% 64)) / 8 / 8;
6044 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6046 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6053 /* Arrays are handled as small records. */
6056 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6057 TREE_TYPE (type
), subclasses
, bit_offset
);
6061 /* The partial classes are now full classes. */
6062 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6063 subclasses
[0] = X86_64_SSE_CLASS
;
6064 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6065 && !((bit_offset
% 64) == 0 && bytes
== 4))
6066 subclasses
[0] = X86_64_INTEGER_CLASS
;
6068 for (i
= 0; i
< words
; i
++)
6069 classes
[i
] = subclasses
[i
% num
];
6074 case QUAL_UNION_TYPE
:
6075 /* Unions are similar to RECORD_TYPE but offset is always 0.
6077 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6079 if (TREE_CODE (field
) == FIELD_DECL
)
6083 if (TREE_TYPE (field
) == error_mark_node
)
6086 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6087 TREE_TYPE (field
), subclasses
,
6091 for (i
= 0; i
< num
; i
++)
6092 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6103 /* When size > 16 bytes, if the first one isn't
6104 X86_64_SSE_CLASS or any other ones aren't
6105 X86_64_SSEUP_CLASS, everything should be passed in
6107 if (classes
[0] != X86_64_SSE_CLASS
)
6110 for (i
= 1; i
< words
; i
++)
6111 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6115 /* Final merger cleanup. */
6116 for (i
= 0; i
< words
; i
++)
6118 /* If one class is MEMORY, everything should be passed in
6120 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6123 /* The X86_64_SSEUP_CLASS should be always preceded by
6124 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6125 if (classes
[i
] == X86_64_SSEUP_CLASS
6126 && classes
[i
- 1] != X86_64_SSE_CLASS
6127 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6129 /* The first one should never be X86_64_SSEUP_CLASS. */
6130 gcc_assert (i
!= 0);
6131 classes
[i
] = X86_64_SSE_CLASS
;
6134 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6135 everything should be passed in memory. */
6136 if (classes
[i
] == X86_64_X87UP_CLASS
6137 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6141 /* The first one should never be X86_64_X87UP_CLASS. */
6142 gcc_assert (i
!= 0);
6143 if (!warned
&& warn_psabi
)
6146 inform (input_location
,
6147 "the ABI of passing union with long double"
6148 " has changed in GCC 4.4");
6156 /* Compute alignment needed. We align all types to natural boundaries with
6157 exception of XFmode that is aligned to 64bits. */
6158 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6160 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6163 mode_alignment
= 128;
6164 else if (mode
== XCmode
)
6165 mode_alignment
= 256;
6166 if (COMPLEX_MODE_P (mode
))
6167 mode_alignment
/= 2;
6168 /* Misaligned fields are always returned in memory. */
6169 if (bit_offset
% mode_alignment
)
6173 /* for V1xx modes, just use the base mode */
6174 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6175 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6176 mode
= GET_MODE_INNER (mode
);
6178 /* Classification of atomic types. */
6183 classes
[0] = X86_64_SSE_CLASS
;
6186 classes
[0] = X86_64_SSE_CLASS
;
6187 classes
[1] = X86_64_SSEUP_CLASS
;
6197 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6201 classes
[0] = X86_64_INTEGERSI_CLASS
;
6204 else if (size
<= 64)
6206 classes
[0] = X86_64_INTEGER_CLASS
;
6209 else if (size
<= 64+32)
6211 classes
[0] = X86_64_INTEGER_CLASS
;
6212 classes
[1] = X86_64_INTEGERSI_CLASS
;
6215 else if (size
<= 64+64)
6217 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6225 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6229 /* OImode shouldn't be used directly. */
6234 if (!(bit_offset
% 64))
6235 classes
[0] = X86_64_SSESF_CLASS
;
6237 classes
[0] = X86_64_SSE_CLASS
;
6240 classes
[0] = X86_64_SSEDF_CLASS
;
6243 classes
[0] = X86_64_X87_CLASS
;
6244 classes
[1] = X86_64_X87UP_CLASS
;
6247 classes
[0] = X86_64_SSE_CLASS
;
6248 classes
[1] = X86_64_SSEUP_CLASS
;
6251 classes
[0] = X86_64_SSE_CLASS
;
6252 if (!(bit_offset
% 64))
6258 if (!warned
&& warn_psabi
)
6261 inform (input_location
,
6262 "the ABI of passing structure with complex float"
6263 " member has changed in GCC 4.4");
6265 classes
[1] = X86_64_SSESF_CLASS
;
6269 classes
[0] = X86_64_SSEDF_CLASS
;
6270 classes
[1] = X86_64_SSEDF_CLASS
;
6273 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6276 /* This modes is larger than 16 bytes. */
6284 classes
[0] = X86_64_SSE_CLASS
;
6285 classes
[1] = X86_64_SSEUP_CLASS
;
6286 classes
[2] = X86_64_SSEUP_CLASS
;
6287 classes
[3] = X86_64_SSEUP_CLASS
;
6295 classes
[0] = X86_64_SSE_CLASS
;
6296 classes
[1] = X86_64_SSEUP_CLASS
;
6304 classes
[0] = X86_64_SSE_CLASS
;
6310 gcc_assert (VECTOR_MODE_P (mode
));
6315 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6317 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6318 classes
[0] = X86_64_INTEGERSI_CLASS
;
6320 classes
[0] = X86_64_INTEGER_CLASS
;
6321 classes
[1] = X86_64_INTEGER_CLASS
;
6322 return 1 + (bytes
> 8);
6326 /* Examine the argument and return set number of register required in each
6327 class. Return 0 iff parameter should be passed in memory. */
6329 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6330 int *int_nregs
, int *sse_nregs
)
6332 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6333 int n
= classify_argument (mode
, type
, regclass
, 0);
6339 for (n
--; n
>= 0; n
--)
6340 switch (regclass
[n
])
6342 case X86_64_INTEGER_CLASS
:
6343 case X86_64_INTEGERSI_CLASS
:
6346 case X86_64_SSE_CLASS
:
6347 case X86_64_SSESF_CLASS
:
6348 case X86_64_SSEDF_CLASS
:
6351 case X86_64_NO_CLASS
:
6352 case X86_64_SSEUP_CLASS
:
6354 case X86_64_X87_CLASS
:
6355 case X86_64_X87UP_CLASS
:
6359 case X86_64_COMPLEX_X87_CLASS
:
6360 return in_return
? 2 : 0;
6361 case X86_64_MEMORY_CLASS
:
6367 /* Construct container for the argument used by GCC interface. See
6368 FUNCTION_ARG for the detailed description. */
6371 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6372 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6373 const int *intreg
, int sse_regno
)
6375 /* The following variables hold the static issued_error state. */
6376 static bool issued_sse_arg_error
;
6377 static bool issued_sse_ret_error
;
6378 static bool issued_x87_ret_error
;
6380 enum machine_mode tmpmode
;
6382 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6383 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6387 int needed_sseregs
, needed_intregs
;
6388 rtx exp
[MAX_CLASSES
];
6391 n
= classify_argument (mode
, type
, regclass
, 0);
6394 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6397 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6400 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6401 some less clueful developer tries to use floating-point anyway. */
6402 if (needed_sseregs
&& !TARGET_SSE
)
6406 if (!issued_sse_ret_error
)
6408 error ("SSE register return with SSE disabled");
6409 issued_sse_ret_error
= true;
6412 else if (!issued_sse_arg_error
)
6414 error ("SSE register argument with SSE disabled");
6415 issued_sse_arg_error
= true;
6420 /* Likewise, error if the ABI requires us to return values in the
6421 x87 registers and the user specified -mno-80387. */
6422 if (!TARGET_80387
&& in_return
)
6423 for (i
= 0; i
< n
; i
++)
6424 if (regclass
[i
] == X86_64_X87_CLASS
6425 || regclass
[i
] == X86_64_X87UP_CLASS
6426 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6428 if (!issued_x87_ret_error
)
6430 error ("x87 register return with x87 disabled");
6431 issued_x87_ret_error
= true;
6436 /* First construct simple cases. Avoid SCmode, since we want to use
6437 single register to pass this type. */
6438 if (n
== 1 && mode
!= SCmode
)
6439 switch (regclass
[0])
6441 case X86_64_INTEGER_CLASS
:
6442 case X86_64_INTEGERSI_CLASS
:
6443 return gen_rtx_REG (mode
, intreg
[0]);
6444 case X86_64_SSE_CLASS
:
6445 case X86_64_SSESF_CLASS
:
6446 case X86_64_SSEDF_CLASS
:
6447 if (mode
!= BLKmode
)
6448 return gen_reg_or_parallel (mode
, orig_mode
,
6449 SSE_REGNO (sse_regno
));
6451 case X86_64_X87_CLASS
:
6452 case X86_64_COMPLEX_X87_CLASS
:
6453 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6454 case X86_64_NO_CLASS
:
6455 /* Zero sized array, struct or class. */
6461 && regclass
[0] == X86_64_SSE_CLASS
6462 && regclass
[1] == X86_64_SSEUP_CLASS
6464 return gen_reg_or_parallel (mode
, orig_mode
,
6465 SSE_REGNO (sse_regno
));
6467 && regclass
[0] == X86_64_SSE_CLASS
6468 && regclass
[1] == X86_64_SSEUP_CLASS
6469 && regclass
[2] == X86_64_SSEUP_CLASS
6470 && regclass
[3] == X86_64_SSEUP_CLASS
6472 return gen_reg_or_parallel (mode
, orig_mode
,
6473 SSE_REGNO (sse_regno
));
6475 && regclass
[0] == X86_64_X87_CLASS
6476 && regclass
[1] == X86_64_X87UP_CLASS
)
6477 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6480 && regclass
[0] == X86_64_INTEGER_CLASS
6481 && regclass
[1] == X86_64_INTEGER_CLASS
6482 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6483 && intreg
[0] + 1 == intreg
[1])
6484 return gen_rtx_REG (mode
, intreg
[0]);
6486 /* Otherwise figure out the entries of the PARALLEL. */
6487 for (i
= 0; i
< n
; i
++)
6491 switch (regclass
[i
])
6493 case X86_64_NO_CLASS
:
6495 case X86_64_INTEGER_CLASS
:
6496 case X86_64_INTEGERSI_CLASS
:
6497 /* Merge TImodes on aligned occasions here too. */
6498 if (i
* 8 + 8 > bytes
)
6500 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6501 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6505 /* We've requested 24 bytes we
6506 don't have mode for. Use DImode. */
6507 if (tmpmode
== BLKmode
)
6510 = gen_rtx_EXPR_LIST (VOIDmode
,
6511 gen_rtx_REG (tmpmode
, *intreg
),
6515 case X86_64_SSESF_CLASS
:
6517 = gen_rtx_EXPR_LIST (VOIDmode
,
6518 gen_rtx_REG (SFmode
,
6519 SSE_REGNO (sse_regno
)),
6523 case X86_64_SSEDF_CLASS
:
6525 = gen_rtx_EXPR_LIST (VOIDmode
,
6526 gen_rtx_REG (DFmode
,
6527 SSE_REGNO (sse_regno
)),
6531 case X86_64_SSE_CLASS
:
6539 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6549 && regclass
[1] == X86_64_SSEUP_CLASS
6550 && regclass
[2] == X86_64_SSEUP_CLASS
6551 && regclass
[3] == X86_64_SSEUP_CLASS
);
6559 = gen_rtx_EXPR_LIST (VOIDmode
,
6560 gen_rtx_REG (tmpmode
,
6561 SSE_REGNO (sse_regno
)),
6570 /* Empty aligned struct, union or class. */
6574 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6575 for (i
= 0; i
< nexps
; i
++)
6576 XVECEXP (ret
, 0, i
) = exp
[i
];
6580 /* Update the data in CUM to advance over an argument of mode MODE
6581 and data type TYPE. (TYPE is null for libcalls where that information
6582 may not be available.) */
6585 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6586 const_tree type
, HOST_WIDE_INT bytes
,
6587 HOST_WIDE_INT words
)
6603 cum
->words
+= words
;
6604 cum
->nregs
-= words
;
6605 cum
->regno
+= words
;
6607 if (cum
->nregs
<= 0)
6615 /* OImode shouldn't be used directly. */
6619 if (cum
->float_in_sse
< 2)
6622 if (cum
->float_in_sse
< 1)
6639 if (!type
|| !AGGREGATE_TYPE_P (type
))
6641 cum
->sse_words
+= words
;
6642 cum
->sse_nregs
-= 1;
6643 cum
->sse_regno
+= 1;
6644 if (cum
->sse_nregs
<= 0)
6658 if (!type
|| !AGGREGATE_TYPE_P (type
))
6660 cum
->mmx_words
+= words
;
6661 cum
->mmx_nregs
-= 1;
6662 cum
->mmx_regno
+= 1;
6663 if (cum
->mmx_nregs
<= 0)
6674 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6675 const_tree type
, HOST_WIDE_INT words
, bool named
)
6677 int int_nregs
, sse_nregs
;
6679 /* Unnamed 256bit vector mode parameters are passed on stack. */
6680 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6683 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6684 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6686 cum
->nregs
-= int_nregs
;
6687 cum
->sse_nregs
-= sse_nregs
;
6688 cum
->regno
+= int_nregs
;
6689 cum
->sse_regno
+= sse_nregs
;
6693 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6694 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6695 cum
->words
+= words
;
6700 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6701 HOST_WIDE_INT words
)
6703 /* Otherwise, this should be passed indirect. */
6704 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6706 cum
->words
+= words
;
6714 /* Update the data in CUM to advance over an argument of mode MODE and
6715 data type TYPE. (TYPE is null for libcalls where that information
6716 may not be available.) */
6719 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6720 const_tree type
, bool named
)
6722 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6723 HOST_WIDE_INT bytes
, words
;
6725 if (mode
== BLKmode
)
6726 bytes
= int_size_in_bytes (type
);
6728 bytes
= GET_MODE_SIZE (mode
);
6729 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6732 mode
= type_natural_mode (type
, NULL
);
6734 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6735 function_arg_advance_ms_64 (cum
, bytes
, words
);
6736 else if (TARGET_64BIT
)
6737 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6739 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6742 /* Define where to put the arguments to a function.
6743 Value is zero to push the argument on the stack,
6744 or a hard register in which to store the argument.
6746 MODE is the argument's machine mode.
6747 TYPE is the data type of the argument (as a tree).
6748 This is null for libcalls where that information may
6750 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6751 the preceding args and about the function being called.
6752 NAMED is nonzero if this argument is a named parameter
6753 (otherwise it is an extra parameter matching an ellipsis). */
6756 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6757 enum machine_mode orig_mode
, const_tree type
,
6758 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6760 static bool warnedsse
, warnedmmx
;
6762 /* Avoid the AL settings for the Unix64 ABI. */
6763 if (mode
== VOIDmode
)
6779 if (words
<= cum
->nregs
)
6781 int regno
= cum
->regno
;
6783 /* Fastcall allocates the first two DWORD (SImode) or
6784 smaller arguments to ECX and EDX if it isn't an
6790 || (type
&& AGGREGATE_TYPE_P (type
)))
6793 /* ECX not EAX is the first allocated register. */
6794 if (regno
== AX_REG
)
6797 return gen_rtx_REG (mode
, regno
);
6802 if (cum
->float_in_sse
< 2)
6805 if (cum
->float_in_sse
< 1)
6809 /* In 32bit, we pass TImode in xmm registers. */
6816 if (!type
|| !AGGREGATE_TYPE_P (type
))
6818 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6821 warning (0, "SSE vector argument without SSE enabled "
6825 return gen_reg_or_parallel (mode
, orig_mode
,
6826 cum
->sse_regno
+ FIRST_SSE_REG
);
6831 /* OImode shouldn't be used directly. */
6840 if (!type
|| !AGGREGATE_TYPE_P (type
))
6843 return gen_reg_or_parallel (mode
, orig_mode
,
6844 cum
->sse_regno
+ FIRST_SSE_REG
);
6854 if (!type
|| !AGGREGATE_TYPE_P (type
))
6856 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6859 warning (0, "MMX vector argument without MMX enabled "
6863 return gen_reg_or_parallel (mode
, orig_mode
,
6864 cum
->mmx_regno
+ FIRST_MMX_REG
);
6873 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6874 enum machine_mode orig_mode
, const_tree type
, bool named
)
6876 /* Handle a hidden AL argument containing number of registers
6877 for varargs x86-64 functions. */
6878 if (mode
== VOIDmode
)
6879 return GEN_INT (cum
->maybe_vaarg
6880 ? (cum
->sse_nregs
< 0
6881 ? X86_64_SSE_REGPARM_MAX
6896 /* Unnamed 256bit vector mode parameters are passed on stack. */
6902 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6904 &x86_64_int_parameter_registers
[cum
->regno
],
6909 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6910 enum machine_mode orig_mode
, bool named
,
6911 HOST_WIDE_INT bytes
)
6915 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6916 We use value of -2 to specify that current function call is MSABI. */
6917 if (mode
== VOIDmode
)
6918 return GEN_INT (-2);
6920 /* If we've run out of registers, it goes on the stack. */
6921 if (cum
->nregs
== 0)
6924 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6926 /* Only floating point modes are passed in anything but integer regs. */
6927 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6930 regno
= cum
->regno
+ FIRST_SSE_REG
;
6935 /* Unnamed floating parameters are passed in both the
6936 SSE and integer registers. */
6937 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6938 t2
= gen_rtx_REG (mode
, regno
);
6939 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6940 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6941 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6944 /* Handle aggregated types passed in register. */
6945 if (orig_mode
== BLKmode
)
6947 if (bytes
> 0 && bytes
<= 8)
6948 mode
= (bytes
> 4 ? DImode
: SImode
);
6949 if (mode
== BLKmode
)
6953 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6956 /* Return where to put the arguments to a function.
6957 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6959 MODE is the argument's machine mode. TYPE is the data type of the
6960 argument. It is null for libcalls where that information may not be
6961 available. CUM gives information about the preceding args and about
6962 the function being called. NAMED is nonzero if this argument is a
6963 named parameter (otherwise it is an extra parameter matching an
6967 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6968 const_tree type
, bool named
)
6970 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6971 enum machine_mode mode
= omode
;
6972 HOST_WIDE_INT bytes
, words
;
6975 if (mode
== BLKmode
)
6976 bytes
= int_size_in_bytes (type
);
6978 bytes
= GET_MODE_SIZE (mode
);
6979 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6981 /* To simplify the code below, represent vector types with a vector mode
6982 even if MMX/SSE are not active. */
6983 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6984 mode
= type_natural_mode (type
, cum
);
6986 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6987 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6988 else if (TARGET_64BIT
)
6989 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6991 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6993 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
6995 /* This argument uses 256bit AVX modes. */
6997 cfun
->machine
->callee_pass_avx256_p
= true;
6999 cfun
->machine
->caller_pass_avx256_p
= true;
7005 /* A C expression that indicates when an argument must be passed by
7006 reference. If nonzero for an argument, a copy of that argument is
7007 made in memory and a pointer to the argument is passed instead of
7008 the argument itself. The pointer is passed in whatever way is
7009 appropriate for passing a pointer to that type. */
7012 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
7013 enum machine_mode mode ATTRIBUTE_UNUSED
,
7014 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7016 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7018 /* See Windows x64 Software Convention. */
7019 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7021 int msize
= (int) GET_MODE_SIZE (mode
);
7024 /* Arrays are passed by reference. */
7025 if (TREE_CODE (type
) == ARRAY_TYPE
)
7028 if (AGGREGATE_TYPE_P (type
))
7030 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7031 are passed by reference. */
7032 msize
= int_size_in_bytes (type
);
7036 /* __m128 is passed by reference. */
7038 case 1: case 2: case 4: case 8:
7044 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7050 /* Return true when TYPE should be 128bit aligned for 32bit argument
7051 passing ABI. XXX: This function is obsolete and is only used for
7052 checking psABI compatibility with previous versions of GCC. */
7055 ix86_compat_aligned_value_p (const_tree type
)
7057 enum machine_mode mode
= TYPE_MODE (type
);
7058 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7062 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7064 if (TYPE_ALIGN (type
) < 128)
7067 if (AGGREGATE_TYPE_P (type
))
7069 /* Walk the aggregates recursively. */
7070 switch (TREE_CODE (type
))
7074 case QUAL_UNION_TYPE
:
7078 /* Walk all the structure fields. */
7079 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7081 if (TREE_CODE (field
) == FIELD_DECL
7082 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7089 /* Just for use if some languages passes arrays by value. */
7090 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7101 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7102 XXX: This function is obsolete and is only used for checking psABI
7103 compatibility with previous versions of GCC. */
7106 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7107 const_tree type
, unsigned int align
)
7109 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7110 natural boundaries. */
7111 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7113 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7114 make an exception for SSE modes since these require 128bit
7117 The handling here differs from field_alignment. ICC aligns MMX
7118 arguments to 4 byte boundaries, while structure fields are aligned
7119 to 8 byte boundaries. */
7122 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7123 align
= PARM_BOUNDARY
;
7127 if (!ix86_compat_aligned_value_p (type
))
7128 align
= PARM_BOUNDARY
;
7131 if (align
> BIGGEST_ALIGNMENT
)
7132 align
= BIGGEST_ALIGNMENT
;
7136 /* Return true when TYPE should be 128bit aligned for 32bit argument
7140 ix86_contains_aligned_value_p (const_tree type
)
7142 enum machine_mode mode
= TYPE_MODE (type
);
7144 if (mode
== XFmode
|| mode
== XCmode
)
7147 if (TYPE_ALIGN (type
) < 128)
7150 if (AGGREGATE_TYPE_P (type
))
7152 /* Walk the aggregates recursively. */
7153 switch (TREE_CODE (type
))
7157 case QUAL_UNION_TYPE
:
7161 /* Walk all the structure fields. */
7162 for (field
= TYPE_FIELDS (type
);
7164 field
= DECL_CHAIN (field
))
7166 if (TREE_CODE (field
) == FIELD_DECL
7167 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7174 /* Just for use if some languages passes arrays by value. */
7175 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7184 return TYPE_ALIGN (type
) >= 128;
7189 /* Gives the alignment boundary, in bits, of an argument with the
7190 specified mode and type. */
7193 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7198 /* Since the main variant type is used for call, we convert it to
7199 the main variant type. */
7200 type
= TYPE_MAIN_VARIANT (type
);
7201 align
= TYPE_ALIGN (type
);
7204 align
= GET_MODE_ALIGNMENT (mode
);
7205 if (align
< PARM_BOUNDARY
)
7206 align
= PARM_BOUNDARY
;
7210 unsigned int saved_align
= align
;
7214 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7217 if (mode
== XFmode
|| mode
== XCmode
)
7218 align
= PARM_BOUNDARY
;
7220 else if (!ix86_contains_aligned_value_p (type
))
7221 align
= PARM_BOUNDARY
;
7224 align
= PARM_BOUNDARY
;
7229 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7233 inform (input_location
,
7234 "The ABI for passing parameters with %d-byte"
7235 " alignment has changed in GCC 4.6",
7236 align
/ BITS_PER_UNIT
);
7243 /* Return true if N is a possible register number of function value. */
7246 ix86_function_value_regno_p (const unsigned int regno
)
7253 case FIRST_FLOAT_REG
:
7254 /* TODO: The function should depend on current function ABI but
7255 builtins.c would need updating then. Therefore we use the
7257 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7259 return TARGET_FLOAT_RETURNS_IN_80387
;
7265 if (TARGET_MACHO
|| TARGET_64BIT
)
7273 /* Define how to find the value returned by a function.
7274 VALTYPE is the data type of the value (as a tree).
7275 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7276 otherwise, FUNC is 0. */
7279 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7280 const_tree fntype
, const_tree fn
)
7284 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7285 we normally prevent this case when mmx is not available. However
7286 some ABIs may require the result to be returned like DImode. */
7287 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7288 regno
= FIRST_MMX_REG
;
7290 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7291 we prevent this case when sse is not available. However some ABIs
7292 may require the result to be returned like integer TImode. */
7293 else if (mode
== TImode
7294 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7295 regno
= FIRST_SSE_REG
;
7297 /* 32-byte vector modes in %ymm0. */
7298 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7299 regno
= FIRST_SSE_REG
;
7301 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7302 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7303 regno
= FIRST_FLOAT_REG
;
7305 /* Most things go in %eax. */
7308 /* Override FP return register with %xmm0 for local functions when
7309 SSE math is enabled or for functions with sseregparm attribute. */
7310 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7312 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7313 if ((sse_level
>= 1 && mode
== SFmode
)
7314 || (sse_level
== 2 && mode
== DFmode
))
7315 regno
= FIRST_SSE_REG
;
7318 /* OImode shouldn't be used directly. */
7319 gcc_assert (mode
!= OImode
);
7321 return gen_rtx_REG (orig_mode
, regno
);
7325 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7330 /* Handle libcalls, which don't provide a type node. */
7331 if (valtype
== NULL
)
7345 regno
= FIRST_SSE_REG
;
7349 regno
= FIRST_FLOAT_REG
;
7357 return gen_rtx_REG (mode
, regno
);
7359 else if (POINTER_TYPE_P (valtype
))
7361 /* Pointers are always returned in word_mode. */
7365 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7366 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7367 x86_64_int_return_registers
, 0);
7369 /* For zero sized structures, construct_container returns NULL, but we
7370 need to keep rest of compiler happy by returning meaningful value. */
7372 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7378 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7380 unsigned int regno
= AX_REG
;
7384 switch (GET_MODE_SIZE (mode
))
7387 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7388 && !COMPLEX_MODE_P (mode
))
7389 regno
= FIRST_SSE_REG
;
7393 if (mode
== SFmode
|| mode
== DFmode
)
7394 regno
= FIRST_SSE_REG
;
7400 return gen_rtx_REG (orig_mode
, regno
);
7404 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7405 enum machine_mode orig_mode
, enum machine_mode mode
)
7407 const_tree fn
, fntype
;
7410 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7411 fn
= fntype_or_decl
;
7412 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7414 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7415 return function_value_ms_64 (orig_mode
, mode
);
7416 else if (TARGET_64BIT
)
7417 return function_value_64 (orig_mode
, mode
, valtype
);
7419 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7423 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7424 bool outgoing ATTRIBUTE_UNUSED
)
7426 enum machine_mode mode
, orig_mode
;
7428 orig_mode
= TYPE_MODE (valtype
);
7429 mode
= type_natural_mode (valtype
, NULL
);
7430 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7433 /* Pointer function arguments and return values are promoted to
7436 static enum machine_mode
7437 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7438 int *punsignedp
, const_tree fntype
,
7441 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7443 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7446 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7451 ix86_libcall_value (enum machine_mode mode
)
7453 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7456 /* Return true iff type is returned in memory. */
7458 static bool ATTRIBUTE_UNUSED
7459 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7463 if (mode
== BLKmode
)
7466 size
= int_size_in_bytes (type
);
7468 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7471 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7473 /* User-created vectors small enough to fit in EAX. */
7477 /* MMX/3dNow values are returned in MM0,
7478 except when it doesn't exits or the ABI prescribes otherwise. */
7480 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7482 /* SSE values are returned in XMM0, except when it doesn't exist. */
7486 /* AVX values are returned in YMM0, except when it doesn't exist. */
7497 /* OImode shouldn't be used directly. */
7498 gcc_assert (mode
!= OImode
);
7503 static bool ATTRIBUTE_UNUSED
7504 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7506 int needed_intregs
, needed_sseregs
;
7507 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7510 static bool ATTRIBUTE_UNUSED
7511 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7513 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7515 /* __m128 is returned in xmm0. */
7516 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7517 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7520 /* Otherwise, the size must be exactly in [1248]. */
7521 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7525 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7527 #ifdef SUBTARGET_RETURN_IN_MEMORY
7528 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7530 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7534 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7535 return return_in_memory_ms_64 (type
, mode
);
7537 return return_in_memory_64 (type
, mode
);
7540 return return_in_memory_32 (type
, mode
);
7544 /* When returning SSE vector types, we have a choice of either
7545 (1) being abi incompatible with a -march switch, or
7546 (2) generating an error.
7547 Given no good solution, I think the safest thing is one warning.
7548 The user won't be able to use -Werror, but....
7550 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7551 called in response to actually generating a caller or callee that
7552 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7553 via aggregate_value_p for general type probing from tree-ssa. */
7556 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7558 static bool warnedsse
, warnedmmx
;
7560 if (!TARGET_64BIT
&& type
)
7562 /* Look at the return type of the function, not the function type. */
7563 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7565 if (!TARGET_SSE
&& !warnedsse
)
7568 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7571 warning (0, "SSE vector return without SSE enabled "
7576 if (!TARGET_MMX
&& !warnedmmx
)
7578 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7581 warning (0, "MMX vector return without MMX enabled "
7591 /* Create the va_list data type. */
7593 /* Returns the calling convention specific va_list date type.
7594 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7597 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7599 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7601 /* For i386 we use plain pointer to argument area. */
7602 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7603 return build_pointer_type (char_type_node
);
7605 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7606 type_decl
= build_decl (BUILTINS_LOCATION
,
7607 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7609 f_gpr
= build_decl (BUILTINS_LOCATION
,
7610 FIELD_DECL
, get_identifier ("gp_offset"),
7611 unsigned_type_node
);
7612 f_fpr
= build_decl (BUILTINS_LOCATION
,
7613 FIELD_DECL
, get_identifier ("fp_offset"),
7614 unsigned_type_node
);
7615 f_ovf
= build_decl (BUILTINS_LOCATION
,
7616 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7618 f_sav
= build_decl (BUILTINS_LOCATION
,
7619 FIELD_DECL
, get_identifier ("reg_save_area"),
7622 va_list_gpr_counter_field
= f_gpr
;
7623 va_list_fpr_counter_field
= f_fpr
;
7625 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7626 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7627 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7628 DECL_FIELD_CONTEXT (f_sav
) = record
;
7630 TYPE_STUB_DECL (record
) = type_decl
;
7631 TYPE_NAME (record
) = type_decl
;
7632 TYPE_FIELDS (record
) = f_gpr
;
7633 DECL_CHAIN (f_gpr
) = f_fpr
;
7634 DECL_CHAIN (f_fpr
) = f_ovf
;
7635 DECL_CHAIN (f_ovf
) = f_sav
;
7637 layout_type (record
);
7639 /* The correct type is an array type of one element. */
7640 return build_array_type (record
, build_index_type (size_zero_node
));
7643 /* Setup the builtin va_list data type and for 64-bit the additional
7644 calling convention specific va_list data types. */
7647 ix86_build_builtin_va_list (void)
7649 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7651 /* Initialize abi specific va_list builtin types. */
7655 if (ix86_abi
== MS_ABI
)
7657 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7658 if (TREE_CODE (t
) != RECORD_TYPE
)
7659 t
= build_variant_type_copy (t
);
7660 sysv_va_list_type_node
= t
;
7665 if (TREE_CODE (t
) != RECORD_TYPE
)
7666 t
= build_variant_type_copy (t
);
7667 sysv_va_list_type_node
= t
;
7669 if (ix86_abi
!= MS_ABI
)
7671 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7672 if (TREE_CODE (t
) != RECORD_TYPE
)
7673 t
= build_variant_type_copy (t
);
7674 ms_va_list_type_node
= t
;
7679 if (TREE_CODE (t
) != RECORD_TYPE
)
7680 t
= build_variant_type_copy (t
);
7681 ms_va_list_type_node
= t
;
7688 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7691 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7697 /* GPR size of varargs save area. */
7698 if (cfun
->va_list_gpr_size
)
7699 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7701 ix86_varargs_gpr_size
= 0;
7703 /* FPR size of varargs save area. We don't need it if we don't pass
7704 anything in SSE registers. */
7705 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7706 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7708 ix86_varargs_fpr_size
= 0;
7710 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7713 save_area
= frame_pointer_rtx
;
7714 set
= get_varargs_alias_set ();
7716 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7717 if (max
> X86_64_REGPARM_MAX
)
7718 max
= X86_64_REGPARM_MAX
;
7720 for (i
= cum
->regno
; i
< max
; i
++)
7722 mem
= gen_rtx_MEM (word_mode
,
7723 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7724 MEM_NOTRAP_P (mem
) = 1;
7725 set_mem_alias_set (mem
, set
);
7726 emit_move_insn (mem
,
7727 gen_rtx_REG (word_mode
,
7728 x86_64_int_parameter_registers
[i
]));
7731 if (ix86_varargs_fpr_size
)
7733 enum machine_mode smode
;
7736 /* Now emit code to save SSE registers. The AX parameter contains number
7737 of SSE parameter registers used to call this function, though all we
7738 actually check here is the zero/non-zero status. */
7740 label
= gen_label_rtx ();
7741 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7742 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7745 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7746 we used movdqa (i.e. TImode) instead? Perhaps even better would
7747 be if we could determine the real mode of the data, via a hook
7748 into pass_stdarg. Ignore all that for now. */
7750 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7751 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7753 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7754 if (max
> X86_64_SSE_REGPARM_MAX
)
7755 max
= X86_64_SSE_REGPARM_MAX
;
7757 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7759 mem
= plus_constant (Pmode
, save_area
,
7760 i
* 16 + ix86_varargs_gpr_size
);
7761 mem
= gen_rtx_MEM (smode
, mem
);
7762 MEM_NOTRAP_P (mem
) = 1;
7763 set_mem_alias_set (mem
, set
);
7764 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7766 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7774 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7776 alias_set_type set
= get_varargs_alias_set ();
7779 /* Reset to zero, as there might be a sysv vaarg used
7781 ix86_varargs_gpr_size
= 0;
7782 ix86_varargs_fpr_size
= 0;
7784 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7788 mem
= gen_rtx_MEM (Pmode
,
7789 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7790 i
* UNITS_PER_WORD
));
7791 MEM_NOTRAP_P (mem
) = 1;
7792 set_mem_alias_set (mem
, set
);
7794 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7795 emit_move_insn (mem
, reg
);
7800 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7801 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7804 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7805 CUMULATIVE_ARGS next_cum
;
7808 /* This argument doesn't appear to be used anymore. Which is good,
7809 because the old code here didn't suppress rtl generation. */
7810 gcc_assert (!no_rtl
);
7815 fntype
= TREE_TYPE (current_function_decl
);
7817 /* For varargs, we do not want to skip the dummy va_dcl argument.
7818 For stdargs, we do want to skip the last named argument. */
7820 if (stdarg_p (fntype
))
7821 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7824 if (cum
->call_abi
== MS_ABI
)
7825 setup_incoming_varargs_ms_64 (&next_cum
);
7827 setup_incoming_varargs_64 (&next_cum
);
7830 /* Checks if TYPE is of kind va_list char *. */
7833 is_va_list_char_pointer (tree type
)
7837 /* For 32-bit it is always true. */
7840 canonic
= ix86_canonical_va_list_type (type
);
7841 return (canonic
== ms_va_list_type_node
7842 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7845 /* Implement va_start. */
7848 ix86_va_start (tree valist
, rtx nextarg
)
7850 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7851 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7852 tree gpr
, fpr
, ovf
, sav
, t
;
7856 if (flag_split_stack
7857 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7859 unsigned int scratch_regno
;
7861 /* When we are splitting the stack, we can't refer to the stack
7862 arguments using internal_arg_pointer, because they may be on
7863 the old stack. The split stack prologue will arrange to
7864 leave a pointer to the old stack arguments in a scratch
7865 register, which we here copy to a pseudo-register. The split
7866 stack prologue can't set the pseudo-register directly because
7867 it (the prologue) runs before any registers have been saved. */
7869 scratch_regno
= split_stack_prologue_scratch_regno ();
7870 if (scratch_regno
!= INVALID_REGNUM
)
7874 reg
= gen_reg_rtx (Pmode
);
7875 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7878 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7882 push_topmost_sequence ();
7883 emit_insn_after (seq
, entry_of_function ());
7884 pop_topmost_sequence ();
7888 /* Only 64bit target needs something special. */
7889 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7891 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7892 std_expand_builtin_va_start (valist
, nextarg
);
7897 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7898 next
= expand_binop (ptr_mode
, add_optab
,
7899 cfun
->machine
->split_stack_varargs_pointer
,
7900 crtl
->args
.arg_offset_rtx
,
7901 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7902 convert_move (va_r
, next
, 0);
7907 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7908 f_fpr
= DECL_CHAIN (f_gpr
);
7909 f_ovf
= DECL_CHAIN (f_fpr
);
7910 f_sav
= DECL_CHAIN (f_ovf
);
7912 valist
= build_simple_mem_ref (valist
);
7913 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7914 /* The following should be folded into the MEM_REF offset. */
7915 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7917 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7919 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7921 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7924 /* Count number of gp and fp argument registers used. */
7925 words
= crtl
->args
.info
.words
;
7926 n_gpr
= crtl
->args
.info
.regno
;
7927 n_fpr
= crtl
->args
.info
.sse_regno
;
7929 if (cfun
->va_list_gpr_size
)
7931 type
= TREE_TYPE (gpr
);
7932 t
= build2 (MODIFY_EXPR
, type
,
7933 gpr
, build_int_cst (type
, n_gpr
* 8));
7934 TREE_SIDE_EFFECTS (t
) = 1;
7935 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7938 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7940 type
= TREE_TYPE (fpr
);
7941 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7942 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7943 TREE_SIDE_EFFECTS (t
) = 1;
7944 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7947 /* Find the overflow area. */
7948 type
= TREE_TYPE (ovf
);
7949 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7950 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7952 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7953 t
= make_tree (type
, ovf_rtx
);
7955 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7956 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7957 TREE_SIDE_EFFECTS (t
) = 1;
7958 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7960 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7962 /* Find the register save area.
7963 Prologue of the function save it right above stack frame. */
7964 type
= TREE_TYPE (sav
);
7965 t
= make_tree (type
, frame_pointer_rtx
);
7966 if (!ix86_varargs_gpr_size
)
7967 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7968 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7969 TREE_SIDE_EFFECTS (t
) = 1;
7970 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7974 /* Implement va_arg. */
7977 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7980 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7981 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7982 tree gpr
, fpr
, ovf
, sav
, t
;
7984 tree lab_false
, lab_over
= NULL_TREE
;
7989 enum machine_mode nat_mode
;
7990 unsigned int arg_boundary
;
7992 /* Only 64bit target needs something special. */
7993 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7994 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7996 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7997 f_fpr
= DECL_CHAIN (f_gpr
);
7998 f_ovf
= DECL_CHAIN (f_fpr
);
7999 f_sav
= DECL_CHAIN (f_ovf
);
8001 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8002 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8003 valist
= build_va_arg_indirect_ref (valist
);
8004 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8005 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8006 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8008 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8010 type
= build_pointer_type (type
);
8011 size
= int_size_in_bytes (type
);
8012 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8014 nat_mode
= type_natural_mode (type
, NULL
);
8023 /* Unnamed 256bit vector mode parameters are passed on stack. */
8024 if (!TARGET_64BIT_MS_ABI
)
8031 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8032 type
, 0, X86_64_REGPARM_MAX
,
8033 X86_64_SSE_REGPARM_MAX
, intreg
,
8038 /* Pull the value out of the saved registers. */
8040 addr
= create_tmp_var (ptr_type_node
, "addr");
8044 int needed_intregs
, needed_sseregs
;
8046 tree int_addr
, sse_addr
;
8048 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8049 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8051 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8053 need_temp
= (!REG_P (container
)
8054 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8055 || TYPE_ALIGN (type
) > 128));
8057 /* In case we are passing structure, verify that it is consecutive block
8058 on the register save area. If not we need to do moves. */
8059 if (!need_temp
&& !REG_P (container
))
8061 /* Verify that all registers are strictly consecutive */
8062 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8066 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8068 rtx slot
= XVECEXP (container
, 0, i
);
8069 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8070 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8078 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8080 rtx slot
= XVECEXP (container
, 0, i
);
8081 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8082 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8094 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8095 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8098 /* First ensure that we fit completely in registers. */
8101 t
= build_int_cst (TREE_TYPE (gpr
),
8102 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8103 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8104 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8105 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8106 gimplify_and_add (t
, pre_p
);
8110 t
= build_int_cst (TREE_TYPE (fpr
),
8111 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8112 + X86_64_REGPARM_MAX
* 8);
8113 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8114 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8115 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8116 gimplify_and_add (t
, pre_p
);
8119 /* Compute index to start of area used for integer regs. */
8122 /* int_addr = gpr + sav; */
8123 t
= fold_build_pointer_plus (sav
, gpr
);
8124 gimplify_assign (int_addr
, t
, pre_p
);
8128 /* sse_addr = fpr + sav; */
8129 t
= fold_build_pointer_plus (sav
, fpr
);
8130 gimplify_assign (sse_addr
, t
, pre_p
);
8134 int i
, prev_size
= 0;
8135 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8138 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8139 gimplify_assign (addr
, t
, pre_p
);
8141 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8143 rtx slot
= XVECEXP (container
, 0, i
);
8144 rtx reg
= XEXP (slot
, 0);
8145 enum machine_mode mode
= GET_MODE (reg
);
8151 tree dest_addr
, dest
;
8152 int cur_size
= GET_MODE_SIZE (mode
);
8154 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8155 prev_size
= INTVAL (XEXP (slot
, 1));
8156 if (prev_size
+ cur_size
> size
)
8158 cur_size
= size
- prev_size
;
8159 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8160 if (mode
== BLKmode
)
8163 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8164 if (mode
== GET_MODE (reg
))
8165 addr_type
= build_pointer_type (piece_type
);
8167 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8169 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8172 if (SSE_REGNO_P (REGNO (reg
)))
8174 src_addr
= sse_addr
;
8175 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8179 src_addr
= int_addr
;
8180 src_offset
= REGNO (reg
) * 8;
8182 src_addr
= fold_convert (addr_type
, src_addr
);
8183 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8185 dest_addr
= fold_convert (daddr_type
, addr
);
8186 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8187 if (cur_size
== GET_MODE_SIZE (mode
))
8189 src
= build_va_arg_indirect_ref (src_addr
);
8190 dest
= build_va_arg_indirect_ref (dest_addr
);
8192 gimplify_assign (dest
, src
, pre_p
);
8197 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8198 3, dest_addr
, src_addr
,
8199 size_int (cur_size
));
8200 gimplify_and_add (copy
, pre_p
);
8202 prev_size
+= cur_size
;
8208 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8209 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8210 gimplify_assign (gpr
, t
, pre_p
);
8215 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8216 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8217 gimplify_assign (fpr
, t
, pre_p
);
8220 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8222 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8225 /* ... otherwise out of the overflow area. */
8227 /* When we align parameter on stack for caller, if the parameter
8228 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8229 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8230 here with caller. */
8231 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8232 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8233 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8235 /* Care for on-stack alignment if needed. */
8236 if (arg_boundary
<= 64 || size
== 0)
8240 HOST_WIDE_INT align
= arg_boundary
/ 8;
8241 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8242 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8243 build_int_cst (TREE_TYPE (t
), -align
));
8246 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8247 gimplify_assign (addr
, t
, pre_p
);
8249 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8250 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8253 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8255 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8256 addr
= fold_convert (ptrtype
, addr
);
8259 addr
= build_va_arg_indirect_ref (addr
);
8260 return build_va_arg_indirect_ref (addr
);
8263 /* Return true if OPNUM's MEM should be matched
8264 in movabs* patterns. */
8267 ix86_check_movabs (rtx insn
, int opnum
)
8271 set
= PATTERN (insn
);
8272 if (GET_CODE (set
) == PARALLEL
)
8273 set
= XVECEXP (set
, 0, 0);
8274 gcc_assert (GET_CODE (set
) == SET
);
8275 mem
= XEXP (set
, opnum
);
8276 while (GET_CODE (mem
) == SUBREG
)
8277 mem
= SUBREG_REG (mem
);
8278 gcc_assert (MEM_P (mem
));
8279 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8282 /* Initialize the table of extra 80387 mathematical constants. */
8285 init_ext_80387_constants (void)
8287 static const char * cst
[5] =
8289 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8290 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8291 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8292 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8293 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8297 for (i
= 0; i
< 5; i
++)
8299 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8300 /* Ensure each constant is rounded to XFmode precision. */
8301 real_convert (&ext_80387_constants_table
[i
],
8302 XFmode
, &ext_80387_constants_table
[i
]);
8305 ext_80387_constants_init
= 1;
8308 /* Return non-zero if the constant is something that
8309 can be loaded with a special instruction. */
8312 standard_80387_constant_p (rtx x
)
8314 enum machine_mode mode
= GET_MODE (x
);
8318 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8321 if (x
== CONST0_RTX (mode
))
8323 if (x
== CONST1_RTX (mode
))
8326 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8328 /* For XFmode constants, try to find a special 80387 instruction when
8329 optimizing for size or on those CPUs that benefit from them. */
8331 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8335 if (! ext_80387_constants_init
)
8336 init_ext_80387_constants ();
8338 for (i
= 0; i
< 5; i
++)
8339 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8343 /* Load of the constant -0.0 or -1.0 will be split as
8344 fldz;fchs or fld1;fchs sequence. */
8345 if (real_isnegzero (&r
))
8347 if (real_identical (&r
, &dconstm1
))
8353 /* Return the opcode of the special instruction to be used to load
8357 standard_80387_constant_opcode (rtx x
)
8359 switch (standard_80387_constant_p (x
))
8383 /* Return the CONST_DOUBLE representing the 80387 constant that is
8384 loaded by the specified special instruction. The argument IDX
8385 matches the return value from standard_80387_constant_p. */
8388 standard_80387_constant_rtx (int idx
)
8392 if (! ext_80387_constants_init
)
8393 init_ext_80387_constants ();
8409 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8413 /* Return 1 if X is all 0s and 2 if x is all 1s
8414 in supported SSE/AVX vector mode. */
8417 standard_sse_constant_p (rtx x
)
8419 enum machine_mode mode
= GET_MODE (x
);
8421 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8423 if (vector_all_ones_operand (x
, mode
))
8445 /* Return the opcode of the special instruction to be used to load
8449 standard_sse_constant_opcode (rtx insn
, rtx x
)
8451 switch (standard_sse_constant_p (x
))
8454 switch (get_attr_mode (insn
))
8457 return "%vpxor\t%0, %d0";
8459 return "%vxorpd\t%0, %d0";
8461 return "%vxorps\t%0, %d0";
8464 return "vpxor\t%x0, %x0, %x0";
8466 return "vxorpd\t%x0, %x0, %x0";
8468 return "vxorps\t%x0, %x0, %x0";
8476 return "vpcmpeqd\t%0, %0, %0";
8478 return "pcmpeqd\t%0, %0";
8486 /* Returns true if OP contains a symbol reference */
8489 symbolic_reference_mentioned_p (rtx op
)
8494 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8497 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8498 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8504 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8505 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8509 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8516 /* Return true if it is appropriate to emit `ret' instructions in the
8517 body of a function. Do this only if the epilogue is simple, needing a
8518 couple of insns. Prior to reloading, we can't tell how many registers
8519 must be saved, so return false then. Return false if there is no frame
8520 marker to de-allocate. */
8523 ix86_can_use_return_insn_p (void)
8525 struct ix86_frame frame
;
8527 if (! reload_completed
|| frame_pointer_needed
)
8530 /* Don't allow more than 32k pop, since that's all we can do
8531 with one instruction. */
8532 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8535 ix86_compute_frame_layout (&frame
);
8536 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8537 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8540 /* Value should be nonzero if functions must have frame pointers.
8541 Zero means the frame pointer need not be set up (and parms may
8542 be accessed via the stack pointer) in functions that seem suitable. */
8545 ix86_frame_pointer_required (void)
8547 /* If we accessed previous frames, then the generated code expects
8548 to be able to access the saved ebp value in our frame. */
8549 if (cfun
->machine
->accesses_prev_frame
)
8552 /* Several x86 os'es need a frame pointer for other reasons,
8553 usually pertaining to setjmp. */
8554 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8557 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8558 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8561 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8562 allocation is 4GB. */
8563 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8566 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8567 turns off the frame pointer by default. Turn it back on now if
8568 we've not got a leaf function. */
8569 if (TARGET_OMIT_LEAF_FRAME_POINTER
8571 || ix86_current_function_calls_tls_descriptor
))
8574 if (crtl
->profile
&& !flag_fentry
)
8580 /* Record that the current function accesses previous call frames. */
8583 ix86_setup_frame_addresses (void)
8585 cfun
->machine
->accesses_prev_frame
= 1;
8588 #ifndef USE_HIDDEN_LINKONCE
8589 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8590 # define USE_HIDDEN_LINKONCE 1
8592 # define USE_HIDDEN_LINKONCE 0
8596 static int pic_labels_used
;
8598 /* Fills in the label name that should be used for a pc thunk for
8599 the given register. */
8602 get_pc_thunk_name (char name
[32], unsigned int regno
)
8604 gcc_assert (!TARGET_64BIT
);
8606 if (USE_HIDDEN_LINKONCE
)
8607 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8609 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8613 /* This function generates code for -fpic that loads %ebx with
8614 the return address of the caller and then returns. */
8617 ix86_code_end (void)
8622 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8627 if (!(pic_labels_used
& (1 << regno
)))
8630 get_pc_thunk_name (name
, regno
);
8632 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8633 get_identifier (name
),
8634 build_function_type_list (void_type_node
, NULL_TREE
));
8635 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8636 NULL_TREE
, void_type_node
);
8637 TREE_PUBLIC (decl
) = 1;
8638 TREE_STATIC (decl
) = 1;
8639 DECL_IGNORED_P (decl
) = 1;
8644 switch_to_section (darwin_sections
[text_coal_section
]);
8645 fputs ("\t.weak_definition\t", asm_out_file
);
8646 assemble_name (asm_out_file
, name
);
8647 fputs ("\n\t.private_extern\t", asm_out_file
);
8648 assemble_name (asm_out_file
, name
);
8649 putc ('\n', asm_out_file
);
8650 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8651 DECL_WEAK (decl
) = 1;
8655 if (USE_HIDDEN_LINKONCE
)
8657 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8659 targetm
.asm_out
.unique_section (decl
, 0);
8660 switch_to_section (get_named_section (decl
, NULL
, 0));
8662 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8663 fputs ("\t.hidden\t", asm_out_file
);
8664 assemble_name (asm_out_file
, name
);
8665 putc ('\n', asm_out_file
);
8666 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8670 switch_to_section (text_section
);
8671 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8674 DECL_INITIAL (decl
) = make_node (BLOCK
);
8675 current_function_decl
= decl
;
8676 init_function_start (decl
);
8677 first_function_block_is_cold
= false;
8678 /* Make sure unwind info is emitted for the thunk if needed. */
8679 final_start_function (emit_barrier (), asm_out_file
, 1);
8681 /* Pad stack IP move with 4 instructions (two NOPs count
8682 as one instruction). */
8683 if (TARGET_PAD_SHORT_FUNCTION
)
8688 fputs ("\tnop\n", asm_out_file
);
8691 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8692 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8693 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8694 fputs ("\tret\n", asm_out_file
);
8695 final_end_function ();
8696 init_insn_lengths ();
8697 free_after_compilation (cfun
);
8699 current_function_decl
= NULL
;
8702 if (flag_split_stack
)
8703 file_end_indicate_split_stack ();
8706 /* Emit code for the SET_GOT patterns. */
8709 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8715 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8717 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8718 xops
[2] = gen_rtx_MEM (Pmode
,
8719 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8720 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8722 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8723 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8724 an unadorned address. */
8725 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8726 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8727 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8731 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8735 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8737 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8740 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8741 is what will be referenced by the Mach-O PIC subsystem. */
8743 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8746 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8747 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8752 get_pc_thunk_name (name
, REGNO (dest
));
8753 pic_labels_used
|= 1 << REGNO (dest
);
8755 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8756 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8757 output_asm_insn ("call\t%X2", xops
);
8758 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8759 is what will be referenced by the Mach-O PIC subsystem. */
8762 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8764 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8765 CODE_LABEL_NUMBER (label
));
8770 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8775 /* Generate an "push" pattern for input ARG. */
8780 struct machine_function
*m
= cfun
->machine
;
8782 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8783 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8784 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8786 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8787 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8789 return gen_rtx_SET (VOIDmode
,
8790 gen_rtx_MEM (word_mode
,
8791 gen_rtx_PRE_DEC (Pmode
,
8792 stack_pointer_rtx
)),
8796 /* Generate an "pop" pattern for input ARG. */
8801 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8802 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8804 return gen_rtx_SET (VOIDmode
,
8806 gen_rtx_MEM (word_mode
,
8807 gen_rtx_POST_INC (Pmode
,
8808 stack_pointer_rtx
)));
8811 /* Return >= 0 if there is an unused call-clobbered register available
8812 for the entire function. */
8815 ix86_select_alt_pic_regnum (void)
8819 && !ix86_current_function_calls_tls_descriptor
)
8822 /* Can't use the same register for both PIC and DRAP. */
8824 drap
= REGNO (crtl
->drap_reg
);
8827 for (i
= 2; i
>= 0; --i
)
8828 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8832 return INVALID_REGNUM
;
8835 /* Return TRUE if we need to save REGNO. */
8838 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8840 if (pic_offset_table_rtx
8841 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8842 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8844 || crtl
->calls_eh_return
8845 || crtl
->uses_const_pool
))
8846 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8848 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8853 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8854 if (test
== INVALID_REGNUM
)
8861 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8864 return (df_regs_ever_live_p (regno
)
8865 && !call_used_regs
[regno
]
8866 && !fixed_regs
[regno
]
8867 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8870 /* Return number of saved general prupose registers. */
8873 ix86_nsaved_regs (void)
8878 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8879 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8884 /* Return number of saved SSE registrers. */
8887 ix86_nsaved_sseregs (void)
8892 if (!TARGET_64BIT_MS_ABI
)
8894 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8895 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8900 /* Given FROM and TO register numbers, say whether this elimination is
8901 allowed. If stack alignment is needed, we can only replace argument
8902 pointer with hard frame pointer, or replace frame pointer with stack
8903 pointer. Otherwise, frame pointer elimination is automatically
8904 handled and all other eliminations are valid. */
8907 ix86_can_eliminate (const int from
, const int to
)
8909 if (stack_realign_fp
)
8910 return ((from
== ARG_POINTER_REGNUM
8911 && to
== HARD_FRAME_POINTER_REGNUM
)
8912 || (from
== FRAME_POINTER_REGNUM
8913 && to
== STACK_POINTER_REGNUM
));
8915 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8918 /* Return the offset between two registers, one to be eliminated, and the other
8919 its replacement, at the start of a routine. */
8922 ix86_initial_elimination_offset (int from
, int to
)
8924 struct ix86_frame frame
;
8925 ix86_compute_frame_layout (&frame
);
8927 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8928 return frame
.hard_frame_pointer_offset
;
8929 else if (from
== FRAME_POINTER_REGNUM
8930 && to
== HARD_FRAME_POINTER_REGNUM
)
8931 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8934 gcc_assert (to
== STACK_POINTER_REGNUM
);
8936 if (from
== ARG_POINTER_REGNUM
)
8937 return frame
.stack_pointer_offset
;
8939 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8940 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8944 /* In a dynamically-aligned function, we can't know the offset from
8945 stack pointer to frame pointer, so we must ensure that setjmp
8946 eliminates fp against the hard fp (%ebp) rather than trying to
8947 index from %esp up to the top of the frame across a gap that is
8948 of unknown (at compile-time) size. */
8950 ix86_builtin_setjmp_frame_value (void)
8952 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8955 /* When using -fsplit-stack, the allocation routines set a field in
8956 the TCB to the bottom of the stack plus this much space, measured
8959 #define SPLIT_STACK_AVAILABLE 256
8961 /* Fill structure ix86_frame about frame of currently computed function. */
8964 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8966 unsigned HOST_WIDE_INT stack_alignment_needed
;
8967 HOST_WIDE_INT offset
;
8968 unsigned HOST_WIDE_INT preferred_alignment
;
8969 HOST_WIDE_INT size
= get_frame_size ();
8970 HOST_WIDE_INT to_allocate
;
8972 frame
->nregs
= ix86_nsaved_regs ();
8973 frame
->nsseregs
= ix86_nsaved_sseregs ();
8975 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8976 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8978 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8979 function prologues and leaf. */
8980 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8981 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
8982 || ix86_current_function_calls_tls_descriptor
))
8984 preferred_alignment
= 16;
8985 stack_alignment_needed
= 16;
8986 crtl
->preferred_stack_boundary
= 128;
8987 crtl
->stack_alignment_needed
= 128;
8990 gcc_assert (!size
|| stack_alignment_needed
);
8991 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8992 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8994 /* For SEH we have to limit the amount of code movement into the prologue.
8995 At present we do this via a BLOCKAGE, at which point there's very little
8996 scheduling that can be done, which means that there's very little point
8997 in doing anything except PUSHs. */
8999 cfun
->machine
->use_fast_prologue_epilogue
= false;
9001 /* During reload iteration the amount of registers saved can change.
9002 Recompute the value as needed. Do not recompute when amount of registers
9003 didn't change as reload does multiple calls to the function and does not
9004 expect the decision to change within single iteration. */
9005 else if (!optimize_function_for_size_p (cfun
)
9006 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9008 int count
= frame
->nregs
;
9009 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9011 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9013 /* The fast prologue uses move instead of push to save registers. This
9014 is significantly longer, but also executes faster as modern hardware
9015 can execute the moves in parallel, but can't do that for push/pop.
9017 Be careful about choosing what prologue to emit: When function takes
9018 many instructions to execute we may use slow version as well as in
9019 case function is known to be outside hot spot (this is known with
9020 feedback only). Weight the size of function by number of registers
9021 to save as it is cheap to use one or two push instructions but very
9022 slow to use many of them. */
9024 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9025 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9026 || (flag_branch_probabilities
9027 && node
->frequency
< NODE_FREQUENCY_HOT
))
9028 cfun
->machine
->use_fast_prologue_epilogue
= false;
9030 cfun
->machine
->use_fast_prologue_epilogue
9031 = !expensive_function_p (count
);
9034 frame
->save_regs_using_mov
9035 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9036 /* If static stack checking is enabled and done with probes,
9037 the registers need to be saved before allocating the frame. */
9038 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9040 /* Skip return address. */
9041 offset
= UNITS_PER_WORD
;
9043 /* Skip pushed static chain. */
9044 if (ix86_static_chain_on_stack
)
9045 offset
+= UNITS_PER_WORD
;
9047 /* Skip saved base pointer. */
9048 if (frame_pointer_needed
)
9049 offset
+= UNITS_PER_WORD
;
9050 frame
->hfp_save_offset
= offset
;
9052 /* The traditional frame pointer location is at the top of the frame. */
9053 frame
->hard_frame_pointer_offset
= offset
;
9055 /* Register save area */
9056 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9057 frame
->reg_save_offset
= offset
;
9059 /* On SEH target, registers are pushed just before the frame pointer
9062 frame
->hard_frame_pointer_offset
= offset
;
9064 /* Align and set SSE register save area. */
9065 if (frame
->nsseregs
)
9067 /* The only ABI that has saved SSE registers (Win64) also has a
9068 16-byte aligned default stack, and thus we don't need to be
9069 within the re-aligned local stack frame to save them. */
9070 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9071 offset
= (offset
+ 16 - 1) & -16;
9072 offset
+= frame
->nsseregs
* 16;
9074 frame
->sse_reg_save_offset
= offset
;
9076 /* The re-aligned stack starts here. Values before this point are not
9077 directly comparable with values below this point. In order to make
9078 sure that no value happens to be the same before and after, force
9079 the alignment computation below to add a non-zero value. */
9080 if (stack_realign_fp
)
9081 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9084 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9085 offset
+= frame
->va_arg_size
;
9087 /* Align start of frame for local function. */
9088 if (stack_realign_fp
9089 || offset
!= frame
->sse_reg_save_offset
9092 || cfun
->calls_alloca
9093 || ix86_current_function_calls_tls_descriptor
)
9094 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9096 /* Frame pointer points here. */
9097 frame
->frame_pointer_offset
= offset
;
9101 /* Add outgoing arguments area. Can be skipped if we eliminated
9102 all the function calls as dead code.
9103 Skipping is however impossible when function calls alloca. Alloca
9104 expander assumes that last crtl->outgoing_args_size
9105 of stack frame are unused. */
9106 if (ACCUMULATE_OUTGOING_ARGS
9107 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9108 || ix86_current_function_calls_tls_descriptor
))
9110 offset
+= crtl
->outgoing_args_size
;
9111 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9114 frame
->outgoing_arguments_size
= 0;
9116 /* Align stack boundary. Only needed if we're calling another function
9118 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9119 || ix86_current_function_calls_tls_descriptor
)
9120 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9122 /* We've reached end of stack frame. */
9123 frame
->stack_pointer_offset
= offset
;
9125 /* Size prologue needs to allocate. */
9126 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9128 if ((!to_allocate
&& frame
->nregs
<= 1)
9129 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9130 frame
->save_regs_using_mov
= false;
9132 if (ix86_using_red_zone ()
9133 && crtl
->sp_is_unchanging
9135 && !ix86_current_function_calls_tls_descriptor
)
9137 frame
->red_zone_size
= to_allocate
;
9138 if (frame
->save_regs_using_mov
)
9139 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9140 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9141 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9144 frame
->red_zone_size
= 0;
9145 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9147 /* The SEH frame pointer location is near the bottom of the frame.
9148 This is enforced by the fact that the difference between the
9149 stack pointer and the frame pointer is limited to 240 bytes in
9150 the unwind data structure. */
9155 /* If we can leave the frame pointer where it is, do so. Also, returns
9156 the establisher frame for __builtin_frame_address (0). */
9157 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9158 if (diff
<= SEH_MAX_FRAME_SIZE
9159 && (diff
> 240 || (diff
& 15) != 0)
9160 && !crtl
->accesses_prior_frames
)
9162 /* Ideally we'd determine what portion of the local stack frame
9163 (within the constraint of the lowest 240) is most heavily used.
9164 But without that complication, simply bias the frame pointer
9165 by 128 bytes so as to maximize the amount of the local stack
9166 frame that is addressable with 8-bit offsets. */
9167 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9172 /* This is semi-inlined memory_address_length, but simplified
9173 since we know that we're always dealing with reg+offset, and
9174 to avoid having to create and discard all that rtl. */
9177 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9183 /* EBP and R13 cannot be encoded without an offset. */
9184 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9186 else if (IN_RANGE (offset
, -128, 127))
9189 /* ESP and R12 must be encoded with a SIB byte. */
9190 if (regno
== SP_REG
|| regno
== R12_REG
)
9196 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9197 The valid base registers are taken from CFUN->MACHINE->FS. */
9200 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9202 const struct machine_function
*m
= cfun
->machine
;
9203 rtx base_reg
= NULL
;
9204 HOST_WIDE_INT base_offset
= 0;
9206 if (m
->use_fast_prologue_epilogue
)
9208 /* Choose the base register most likely to allow the most scheduling
9209 opportunities. Generally FP is valid throughout the function,
9210 while DRAP must be reloaded within the epilogue. But choose either
9211 over the SP due to increased encoding size. */
9215 base_reg
= hard_frame_pointer_rtx
;
9216 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9218 else if (m
->fs
.drap_valid
)
9220 base_reg
= crtl
->drap_reg
;
9221 base_offset
= 0 - cfa_offset
;
9223 else if (m
->fs
.sp_valid
)
9225 base_reg
= stack_pointer_rtx
;
9226 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9231 HOST_WIDE_INT toffset
;
9234 /* Choose the base register with the smallest address encoding.
9235 With a tie, choose FP > DRAP > SP. */
9238 base_reg
= stack_pointer_rtx
;
9239 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9240 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9242 if (m
->fs
.drap_valid
)
9244 toffset
= 0 - cfa_offset
;
9245 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9248 base_reg
= crtl
->drap_reg
;
9249 base_offset
= toffset
;
9255 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9256 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9259 base_reg
= hard_frame_pointer_rtx
;
9260 base_offset
= toffset
;
9265 gcc_assert (base_reg
!= NULL
);
9267 return plus_constant (Pmode
, base_reg
, base_offset
);
9270 /* Emit code to save registers in the prologue. */
9273 ix86_emit_save_regs (void)
9278 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9279 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9281 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9282 RTX_FRAME_RELATED_P (insn
) = 1;
9286 /* Emit a single register save at CFA - CFA_OFFSET. */
9289 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9290 HOST_WIDE_INT cfa_offset
)
9292 struct machine_function
*m
= cfun
->machine
;
9293 rtx reg
= gen_rtx_REG (mode
, regno
);
9294 rtx mem
, addr
, base
, insn
;
9296 addr
= choose_baseaddr (cfa_offset
);
9297 mem
= gen_frame_mem (mode
, addr
);
9299 /* For SSE saves, we need to indicate the 128-bit alignment. */
9300 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9302 insn
= emit_move_insn (mem
, reg
);
9303 RTX_FRAME_RELATED_P (insn
) = 1;
9306 if (GET_CODE (base
) == PLUS
)
9307 base
= XEXP (base
, 0);
9308 gcc_checking_assert (REG_P (base
));
9310 /* When saving registers into a re-aligned local stack frame, avoid
9311 any tricky guessing by dwarf2out. */
9312 if (m
->fs
.realigned
)
9314 gcc_checking_assert (stack_realign_drap
);
9316 if (regno
== REGNO (crtl
->drap_reg
))
9318 /* A bit of a hack. We force the DRAP register to be saved in
9319 the re-aligned stack frame, which provides us with a copy
9320 of the CFA that will last past the prologue. Install it. */
9321 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9322 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9323 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9324 mem
= gen_rtx_MEM (mode
, addr
);
9325 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9329 /* The frame pointer is a stable reference within the
9330 aligned frame. Use it. */
9331 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9332 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9333 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9334 mem
= gen_rtx_MEM (mode
, addr
);
9335 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9336 gen_rtx_SET (VOIDmode
, mem
, reg
));
9340 /* The memory may not be relative to the current CFA register,
9341 which means that we may need to generate a new pattern for
9342 use by the unwind info. */
9343 else if (base
!= m
->fs
.cfa_reg
)
9345 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9346 m
->fs
.cfa_offset
- cfa_offset
);
9347 mem
= gen_rtx_MEM (mode
, addr
);
9348 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9352 /* Emit code to save registers using MOV insns.
9353 First register is stored at CFA - CFA_OFFSET. */
9355 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9359 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9360 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9362 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9363 cfa_offset
-= UNITS_PER_WORD
;
9367 /* Emit code to save SSE registers using MOV insns.
9368 First register is stored at CFA - CFA_OFFSET. */
9370 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9374 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9375 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9377 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9382 static GTY(()) rtx queued_cfa_restores
;
9384 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9385 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9386 Don't add the note if the previously saved value will be left untouched
9387 within stack red-zone till return, as unwinders can find the same value
9388 in the register and on the stack. */
9391 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9393 if (!crtl
->shrink_wrapped
9394 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9399 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9400 RTX_FRAME_RELATED_P (insn
) = 1;
9404 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9407 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9410 ix86_add_queued_cfa_restore_notes (rtx insn
)
9413 if (!queued_cfa_restores
)
9415 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9417 XEXP (last
, 1) = REG_NOTES (insn
);
9418 REG_NOTES (insn
) = queued_cfa_restores
;
9419 queued_cfa_restores
= NULL_RTX
;
9420 RTX_FRAME_RELATED_P (insn
) = 1;
9423 /* Expand prologue or epilogue stack adjustment.
9424 The pattern exist to put a dependency on all ebp-based memory accesses.
9425 STYLE should be negative if instructions should be marked as frame related,
9426 zero if %r11 register is live and cannot be freely used and positive
9430 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9431 int style
, bool set_cfa
)
9433 struct machine_function
*m
= cfun
->machine
;
9435 bool add_frame_related_expr
= false;
9437 if (Pmode
== SImode
)
9438 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9439 else if (x86_64_immediate_operand (offset
, DImode
))
9440 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9444 /* r11 is used by indirect sibcall return as well, set before the
9445 epilogue and used after the epilogue. */
9447 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9450 gcc_assert (src
!= hard_frame_pointer_rtx
9451 && dest
!= hard_frame_pointer_rtx
);
9452 tmp
= hard_frame_pointer_rtx
;
9454 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9456 add_frame_related_expr
= true;
9458 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9461 insn
= emit_insn (insn
);
9463 ix86_add_queued_cfa_restore_notes (insn
);
9469 gcc_assert (m
->fs
.cfa_reg
== src
);
9470 m
->fs
.cfa_offset
+= INTVAL (offset
);
9471 m
->fs
.cfa_reg
= dest
;
9473 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9474 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9475 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9476 RTX_FRAME_RELATED_P (insn
) = 1;
9480 RTX_FRAME_RELATED_P (insn
) = 1;
9481 if (add_frame_related_expr
)
9483 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9484 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9485 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9489 if (dest
== stack_pointer_rtx
)
9491 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9492 bool valid
= m
->fs
.sp_valid
;
9494 if (src
== hard_frame_pointer_rtx
)
9496 valid
= m
->fs
.fp_valid
;
9497 ooffset
= m
->fs
.fp_offset
;
9499 else if (src
== crtl
->drap_reg
)
9501 valid
= m
->fs
.drap_valid
;
9506 /* Else there are two possibilities: SP itself, which we set
9507 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9508 taken care of this by hand along the eh_return path. */
9509 gcc_checking_assert (src
== stack_pointer_rtx
9510 || offset
== const0_rtx
);
9513 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9514 m
->fs
.sp_valid
= valid
;
9518 /* Find an available register to be used as dynamic realign argument
9519 pointer regsiter. Such a register will be written in prologue and
9520 used in begin of body, so it must not be
9521 1. parameter passing register.
9523 We reuse static-chain register if it is available. Otherwise, we
9524 use DI for i386 and R13 for x86-64. We chose R13 since it has
9527 Return: the regno of chosen register. */
9530 find_drap_reg (void)
9532 tree decl
= cfun
->decl
;
9536 /* Use R13 for nested function or function need static chain.
9537 Since function with tail call may use any caller-saved
9538 registers in epilogue, DRAP must not use caller-saved
9539 register in such case. */
9540 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9547 /* Use DI for nested function or function need static chain.
9548 Since function with tail call may use any caller-saved
9549 registers in epilogue, DRAP must not use caller-saved
9550 register in such case. */
9551 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9554 /* Reuse static chain register if it isn't used for parameter
9556 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9558 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9559 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9566 /* Return minimum incoming stack alignment. */
9569 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9571 unsigned int incoming_stack_boundary
;
9573 /* Prefer the one specified at command line. */
9574 if (ix86_user_incoming_stack_boundary
)
9575 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9576 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9577 if -mstackrealign is used, it isn't used for sibcall check and
9578 estimated stack alignment is 128bit. */
9581 && ix86_force_align_arg_pointer
9582 && crtl
->stack_alignment_estimated
== 128)
9583 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9585 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9587 /* Incoming stack alignment can be changed on individual functions
9588 via force_align_arg_pointer attribute. We use the smallest
9589 incoming stack boundary. */
9590 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9591 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9592 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9593 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9595 /* The incoming stack frame has to be aligned at least at
9596 parm_stack_boundary. */
9597 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9598 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9600 /* Stack at entrance of main is aligned by runtime. We use the
9601 smallest incoming stack boundary. */
9602 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9603 && DECL_NAME (current_function_decl
)
9604 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9605 && DECL_FILE_SCOPE_P (current_function_decl
))
9606 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9608 return incoming_stack_boundary
;
9611 /* Update incoming stack boundary and estimated stack alignment. */
9614 ix86_update_stack_boundary (void)
9616 ix86_incoming_stack_boundary
9617 = ix86_minimum_incoming_stack_boundary (false);
9619 /* x86_64 vararg needs 16byte stack alignment for register save
9623 && crtl
->stack_alignment_estimated
< 128)
9624 crtl
->stack_alignment_estimated
= 128;
9627 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9628 needed or an rtx for DRAP otherwise. */
9631 ix86_get_drap_rtx (void)
9633 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9634 crtl
->need_drap
= true;
9636 if (stack_realign_drap
)
9638 /* Assign DRAP to vDRAP and returns vDRAP */
9639 unsigned int regno
= find_drap_reg ();
9644 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9645 crtl
->drap_reg
= arg_ptr
;
9648 drap_vreg
= copy_to_reg (arg_ptr
);
9652 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9655 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9656 RTX_FRAME_RELATED_P (insn
) = 1;
9664 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9667 ix86_internal_arg_pointer (void)
9669 return virtual_incoming_args_rtx
;
9672 struct scratch_reg
{
9677 /* Return a short-lived scratch register for use on function entry.
9678 In 32-bit mode, it is valid only after the registers are saved
9679 in the prologue. This register must be released by means of
9680 release_scratch_register_on_entry once it is dead. */
9683 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9691 /* We always use R11 in 64-bit mode. */
9696 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9698 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9699 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9700 int regparm
= ix86_function_regparm (fntype
, decl
);
9702 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9704 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9705 for the static chain register. */
9706 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9707 && drap_regno
!= AX_REG
)
9709 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9711 /* ecx is the static chain register. */
9712 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9713 && drap_regno
!= CX_REG
)
9715 else if (ix86_save_reg (BX_REG
, true))
9717 /* esi is the static chain register. */
9718 else if (!(regparm
== 3 && static_chain_p
)
9719 && ix86_save_reg (SI_REG
, true))
9721 else if (ix86_save_reg (DI_REG
, true))
9725 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9730 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9733 rtx insn
= emit_insn (gen_push (sr
->reg
));
9734 RTX_FRAME_RELATED_P (insn
) = 1;
9738 /* Release a scratch register obtained from the preceding function. */
9741 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9745 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9747 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9748 RTX_FRAME_RELATED_P (insn
) = 1;
9749 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9750 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9751 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9755 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9757 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9760 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9762 /* We skip the probe for the first interval + a small dope of 4 words and
9763 probe that many bytes past the specified size to maintain a protection
9764 area at the botton of the stack. */
9765 const int dope
= 4 * UNITS_PER_WORD
;
9766 rtx size_rtx
= GEN_INT (size
), last
;
9768 /* See if we have a constant small number of probes to generate. If so,
9769 that's the easy case. The run-time loop is made up of 11 insns in the
9770 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9771 for n # of intervals. */
9772 if (size
<= 5 * PROBE_INTERVAL
)
9774 HOST_WIDE_INT i
, adjust
;
9775 bool first_probe
= true;
9777 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9778 values of N from 1 until it exceeds SIZE. If only one probe is
9779 needed, this will not generate any code. Then adjust and probe
9780 to PROBE_INTERVAL + SIZE. */
9781 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9785 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9786 first_probe
= false;
9789 adjust
= PROBE_INTERVAL
;
9791 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9792 plus_constant (Pmode
, stack_pointer_rtx
,
9794 emit_stack_probe (stack_pointer_rtx
);
9798 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9800 adjust
= size
+ PROBE_INTERVAL
- i
;
9802 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9803 plus_constant (Pmode
, stack_pointer_rtx
,
9805 emit_stack_probe (stack_pointer_rtx
);
9807 /* Adjust back to account for the additional first interval. */
9808 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9809 plus_constant (Pmode
, stack_pointer_rtx
,
9810 PROBE_INTERVAL
+ dope
)));
9813 /* Otherwise, do the same as above, but in a loop. Note that we must be
9814 extra careful with variables wrapping around because we might be at
9815 the very top (or the very bottom) of the address space and we have
9816 to be able to handle this case properly; in particular, we use an
9817 equality test for the loop condition. */
9820 HOST_WIDE_INT rounded_size
;
9821 struct scratch_reg sr
;
9823 get_scratch_register_on_entry (&sr
);
9826 /* Step 1: round SIZE to the previous multiple of the interval. */
9828 rounded_size
= size
& -PROBE_INTERVAL
;
9831 /* Step 2: compute initial and final value of the loop counter. */
9833 /* SP = SP_0 + PROBE_INTERVAL. */
9834 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9835 plus_constant (Pmode
, stack_pointer_rtx
,
9836 - (PROBE_INTERVAL
+ dope
))));
9838 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9839 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9840 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9841 gen_rtx_PLUS (Pmode
, sr
.reg
,
9842 stack_pointer_rtx
)));
9847 while (SP != LAST_ADDR)
9849 SP = SP + PROBE_INTERVAL
9853 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9854 values of N from 1 until it is equal to ROUNDED_SIZE. */
9856 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9859 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9860 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9862 if (size
!= rounded_size
)
9864 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9865 plus_constant (Pmode
, stack_pointer_rtx
,
9866 rounded_size
- size
)));
9867 emit_stack_probe (stack_pointer_rtx
);
9870 /* Adjust back to account for the additional first interval. */
9871 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9872 plus_constant (Pmode
, stack_pointer_rtx
,
9873 PROBE_INTERVAL
+ dope
)));
9875 release_scratch_register_on_entry (&sr
);
9878 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9880 /* Even if the stack pointer isn't the CFA register, we need to correctly
9881 describe the adjustments made to it, in particular differentiate the
9882 frame-related ones from the frame-unrelated ones. */
9885 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9886 XVECEXP (expr
, 0, 0)
9887 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9888 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9889 XVECEXP (expr
, 0, 1)
9890 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9891 plus_constant (Pmode
, stack_pointer_rtx
,
9892 PROBE_INTERVAL
+ dope
+ size
));
9893 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9894 RTX_FRAME_RELATED_P (last
) = 1;
9896 cfun
->machine
->fs
.sp_offset
+= size
;
9899 /* Make sure nothing is scheduled before we are done. */
9900 emit_insn (gen_blockage ());
9903 /* Adjust the stack pointer up to REG while probing it. */
9906 output_adjust_stack_and_probe (rtx reg
)
9908 static int labelno
= 0;
9909 char loop_lab
[32], end_lab
[32];
9912 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9913 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9915 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9917 /* Jump to END_LAB if SP == LAST_ADDR. */
9918 xops
[0] = stack_pointer_rtx
;
9920 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9921 fputs ("\tje\t", asm_out_file
);
9922 assemble_name_raw (asm_out_file
, end_lab
);
9923 fputc ('\n', asm_out_file
);
9925 /* SP = SP + PROBE_INTERVAL. */
9926 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9927 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9930 xops
[1] = const0_rtx
;
9931 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9933 fprintf (asm_out_file
, "\tjmp\t");
9934 assemble_name_raw (asm_out_file
, loop_lab
);
9935 fputc ('\n', asm_out_file
);
9937 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9942 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9943 inclusive. These are offsets from the current stack pointer. */
9946 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9948 /* See if we have a constant small number of probes to generate. If so,
9949 that's the easy case. The run-time loop is made up of 7 insns in the
9950 generic case while the compile-time loop is made up of n insns for n #
9952 if (size
<= 7 * PROBE_INTERVAL
)
9956 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9957 it exceeds SIZE. If only one probe is needed, this will not
9958 generate any code. Then probe at FIRST + SIZE. */
9959 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9960 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9963 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9967 /* Otherwise, do the same as above, but in a loop. Note that we must be
9968 extra careful with variables wrapping around because we might be at
9969 the very top (or the very bottom) of the address space and we have
9970 to be able to handle this case properly; in particular, we use an
9971 equality test for the loop condition. */
9974 HOST_WIDE_INT rounded_size
, last
;
9975 struct scratch_reg sr
;
9977 get_scratch_register_on_entry (&sr
);
9980 /* Step 1: round SIZE to the previous multiple of the interval. */
9982 rounded_size
= size
& -PROBE_INTERVAL
;
9985 /* Step 2: compute initial and final value of the loop counter. */
9987 /* TEST_OFFSET = FIRST. */
9988 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9990 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9991 last
= first
+ rounded_size
;
9996 while (TEST_ADDR != LAST_ADDR)
9998 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10002 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10003 until it is equal to ROUNDED_SIZE. */
10005 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10008 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10009 that SIZE is equal to ROUNDED_SIZE. */
10011 if (size
!= rounded_size
)
10012 emit_stack_probe (plus_constant (Pmode
,
10013 gen_rtx_PLUS (Pmode
,
10016 rounded_size
- size
));
10018 release_scratch_register_on_entry (&sr
);
10021 /* Make sure nothing is scheduled before we are done. */
10022 emit_insn (gen_blockage ());
10025 /* Probe a range of stack addresses from REG to END, inclusive. These are
10026 offsets from the current stack pointer. */
10029 output_probe_stack_range (rtx reg
, rtx end
)
10031 static int labelno
= 0;
10032 char loop_lab
[32], end_lab
[32];
10035 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10036 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10038 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10040 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10043 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10044 fputs ("\tje\t", asm_out_file
);
10045 assemble_name_raw (asm_out_file
, end_lab
);
10046 fputc ('\n', asm_out_file
);
10048 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10049 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10050 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10052 /* Probe at TEST_ADDR. */
10053 xops
[0] = stack_pointer_rtx
;
10055 xops
[2] = const0_rtx
;
10056 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10058 fprintf (asm_out_file
, "\tjmp\t");
10059 assemble_name_raw (asm_out_file
, loop_lab
);
10060 fputc ('\n', asm_out_file
);
10062 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10067 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10068 to be generated in correct form. */
10070 ix86_finalize_stack_realign_flags (void)
10072 /* Check if stack realign is really needed after reload, and
10073 stores result in cfun */
10074 unsigned int incoming_stack_boundary
10075 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10076 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10077 unsigned int stack_realign
= (incoming_stack_boundary
10079 ? crtl
->max_used_stack_slot_alignment
10080 : crtl
->stack_alignment_needed
));
10082 if (crtl
->stack_realign_finalized
)
10084 /* After stack_realign_needed is finalized, we can't no longer
10086 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10090 /* If the only reason for frame_pointer_needed is that we conservatively
10091 assumed stack realignment might be needed, but in the end nothing that
10092 needed the stack alignment had been spilled, clear frame_pointer_needed
10093 and say we don't need stack realignment. */
10095 && !crtl
->need_drap
10096 && frame_pointer_needed
10098 && flag_omit_frame_pointer
10099 && crtl
->sp_is_unchanging
10100 && !ix86_current_function_calls_tls_descriptor
10101 && !crtl
->accesses_prior_frames
10102 && !cfun
->calls_alloca
10103 && !crtl
->calls_eh_return
10104 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10105 && !ix86_frame_pointer_required ()
10106 && get_frame_size () == 0
10107 && ix86_nsaved_sseregs () == 0
10108 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10110 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10113 CLEAR_HARD_REG_SET (prologue_used
);
10114 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10115 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10116 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10117 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10118 HARD_FRAME_POINTER_REGNUM
);
10122 FOR_BB_INSNS (bb
, insn
)
10123 if (NONDEBUG_INSN_P (insn
)
10124 && requires_stack_frame_p (insn
, prologue_used
,
10125 set_up_by_prologue
))
10127 crtl
->stack_realign_needed
= stack_realign
;
10128 crtl
->stack_realign_finalized
= true;
10133 frame_pointer_needed
= false;
10134 stack_realign
= false;
10135 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10136 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10137 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10138 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10139 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10140 df_finish_pass (true);
10141 df_scan_alloc (NULL
);
10143 df_compute_regs_ever_live (true);
10147 crtl
->stack_realign_needed
= stack_realign
;
10148 crtl
->stack_realign_finalized
= true;
10151 /* Expand the prologue into a bunch of separate insns. */
10154 ix86_expand_prologue (void)
10156 struct machine_function
*m
= cfun
->machine
;
10159 struct ix86_frame frame
;
10160 HOST_WIDE_INT allocate
;
10161 bool int_registers_saved
;
10162 bool sse_registers_saved
;
10164 ix86_finalize_stack_realign_flags ();
10166 /* DRAP should not coexist with stack_realign_fp */
10167 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10169 memset (&m
->fs
, 0, sizeof (m
->fs
));
10171 /* Initialize CFA state for before the prologue. */
10172 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10173 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10175 /* Track SP offset to the CFA. We continue tracking this after we've
10176 swapped the CFA register away from SP. In the case of re-alignment
10177 this is fudged; we're interested to offsets within the local frame. */
10178 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10179 m
->fs
.sp_valid
= true;
10181 ix86_compute_frame_layout (&frame
);
10183 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10185 /* We should have already generated an error for any use of
10186 ms_hook on a nested function. */
10187 gcc_checking_assert (!ix86_static_chain_on_stack
);
10189 /* Check if profiling is active and we shall use profiling before
10190 prologue variant. If so sorry. */
10191 if (crtl
->profile
&& flag_fentry
!= 0)
10192 sorry ("ms_hook_prologue attribute isn%'t compatible "
10193 "with -mfentry for 32-bit");
10195 /* In ix86_asm_output_function_label we emitted:
10196 8b ff movl.s %edi,%edi
10198 8b ec movl.s %esp,%ebp
10200 This matches the hookable function prologue in Win32 API
10201 functions in Microsoft Windows XP Service Pack 2 and newer.
10202 Wine uses this to enable Windows apps to hook the Win32 API
10203 functions provided by Wine.
10205 What that means is that we've already set up the frame pointer. */
10207 if (frame_pointer_needed
10208 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10212 /* We've decided to use the frame pointer already set up.
10213 Describe this to the unwinder by pretending that both
10214 push and mov insns happen right here.
10216 Putting the unwind info here at the end of the ms_hook
10217 is done so that we can make absolutely certain we get
10218 the required byte sequence at the start of the function,
10219 rather than relying on an assembler that can produce
10220 the exact encoding required.
10222 However it does mean (in the unpatched case) that we have
10223 a 1 insn window where the asynchronous unwind info is
10224 incorrect. However, if we placed the unwind info at
10225 its correct location we would have incorrect unwind info
10226 in the patched case. Which is probably all moot since
10227 I don't expect Wine generates dwarf2 unwind info for the
10228 system libraries that use this feature. */
10230 insn
= emit_insn (gen_blockage ());
10232 push
= gen_push (hard_frame_pointer_rtx
);
10233 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10234 stack_pointer_rtx
);
10235 RTX_FRAME_RELATED_P (push
) = 1;
10236 RTX_FRAME_RELATED_P (mov
) = 1;
10238 RTX_FRAME_RELATED_P (insn
) = 1;
10239 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10240 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10242 /* Note that gen_push incremented m->fs.cfa_offset, even
10243 though we didn't emit the push insn here. */
10244 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10245 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10246 m
->fs
.fp_valid
= true;
10250 /* The frame pointer is not needed so pop %ebp again.
10251 This leaves us with a pristine state. */
10252 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10256 /* The first insn of a function that accepts its static chain on the
10257 stack is to push the register that would be filled in by a direct
10258 call. This insn will be skipped by the trampoline. */
10259 else if (ix86_static_chain_on_stack
)
10261 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10262 emit_insn (gen_blockage ());
10264 /* We don't want to interpret this push insn as a register save,
10265 only as a stack adjustment. The real copy of the register as
10266 a save will be done later, if needed. */
10267 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10268 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10269 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10270 RTX_FRAME_RELATED_P (insn
) = 1;
10273 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10274 of DRAP is needed and stack realignment is really needed after reload */
10275 if (stack_realign_drap
)
10277 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10279 /* Only need to push parameter pointer reg if it is caller saved. */
10280 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10282 /* Push arg pointer reg */
10283 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10284 RTX_FRAME_RELATED_P (insn
) = 1;
10287 /* Grab the argument pointer. */
10288 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10289 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10290 RTX_FRAME_RELATED_P (insn
) = 1;
10291 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10292 m
->fs
.cfa_offset
= 0;
10294 /* Align the stack. */
10295 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10297 GEN_INT (-align_bytes
)));
10298 RTX_FRAME_RELATED_P (insn
) = 1;
10300 /* Replicate the return address on the stack so that return
10301 address can be reached via (argp - 1) slot. This is needed
10302 to implement macro RETURN_ADDR_RTX and intrinsic function
10303 expand_builtin_return_addr etc. */
10304 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10305 t
= gen_frame_mem (word_mode
, t
);
10306 insn
= emit_insn (gen_push (t
));
10307 RTX_FRAME_RELATED_P (insn
) = 1;
10309 /* For the purposes of frame and register save area addressing,
10310 we've started over with a new frame. */
10311 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10312 m
->fs
.realigned
= true;
10315 int_registers_saved
= (frame
.nregs
== 0);
10316 sse_registers_saved
= (frame
.nsseregs
== 0);
10318 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10320 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10321 slower on all targets. Also sdb doesn't like it. */
10322 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10323 RTX_FRAME_RELATED_P (insn
) = 1;
10325 /* Push registers now, before setting the frame pointer
10327 if (!int_registers_saved
10329 && !frame
.save_regs_using_mov
)
10331 ix86_emit_save_regs ();
10332 int_registers_saved
= true;
10333 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10336 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10338 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10339 RTX_FRAME_RELATED_P (insn
) = 1;
10341 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10342 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10343 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10344 m
->fs
.fp_valid
= true;
10348 if (!int_registers_saved
)
10350 /* If saving registers via PUSH, do so now. */
10351 if (!frame
.save_regs_using_mov
)
10353 ix86_emit_save_regs ();
10354 int_registers_saved
= true;
10355 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10358 /* When using red zone we may start register saving before allocating
10359 the stack frame saving one cycle of the prologue. However, avoid
10360 doing this if we have to probe the stack; at least on x86_64 the
10361 stack probe can turn into a call that clobbers a red zone location. */
10362 else if (ix86_using_red_zone ()
10363 && (! TARGET_STACK_PROBE
10364 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10366 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10367 int_registers_saved
= true;
10371 if (stack_realign_fp
)
10373 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10374 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10376 /* The computation of the size of the re-aligned stack frame means
10377 that we must allocate the size of the register save area before
10378 performing the actual alignment. Otherwise we cannot guarantee
10379 that there's enough storage above the realignment point. */
10380 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10381 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10382 GEN_INT (m
->fs
.sp_offset
10383 - frame
.sse_reg_save_offset
),
10386 /* Align the stack. */
10387 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10389 GEN_INT (-align_bytes
)));
10391 /* For the purposes of register save area addressing, the stack
10392 pointer is no longer valid. As for the value of sp_offset,
10393 see ix86_compute_frame_layout, which we need to match in order
10394 to pass verification of stack_pointer_offset at the end. */
10395 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10396 m
->fs
.sp_valid
= false;
10399 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10401 if (flag_stack_usage_info
)
10403 /* We start to count from ARG_POINTER. */
10404 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10406 /* If it was realigned, take into account the fake frame. */
10407 if (stack_realign_drap
)
10409 if (ix86_static_chain_on_stack
)
10410 stack_size
+= UNITS_PER_WORD
;
10412 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10413 stack_size
+= UNITS_PER_WORD
;
10415 /* This over-estimates by 1 minimal-stack-alignment-unit but
10416 mitigates that by counting in the new return address slot. */
10417 current_function_dynamic_stack_size
10418 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10421 current_function_static_stack_size
= stack_size
;
10424 /* On SEH target with very large frame size, allocate an area to save
10425 SSE registers (as the very large allocation won't be described). */
10427 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10428 && !sse_registers_saved
)
10430 HOST_WIDE_INT sse_size
=
10431 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10433 gcc_assert (int_registers_saved
);
10435 /* No need to do stack checking as the area will be immediately
10437 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10438 GEN_INT (-sse_size
), -1,
10439 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10440 allocate
-= sse_size
;
10441 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10442 sse_registers_saved
= true;
10445 /* The stack has already been decremented by the instruction calling us
10446 so probe if the size is non-negative to preserve the protection area. */
10447 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10449 /* We expect the registers to be saved when probes are used. */
10450 gcc_assert (int_registers_saved
);
10452 if (STACK_CHECK_MOVING_SP
)
10454 ix86_adjust_stack_and_probe (allocate
);
10459 HOST_WIDE_INT size
= allocate
;
10461 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10462 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10464 if (TARGET_STACK_PROBE
)
10465 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10467 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10473 else if (!ix86_target_stack_probe ()
10474 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10476 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10477 GEN_INT (-allocate
), -1,
10478 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10482 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10484 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10486 bool eax_live
= false;
10487 bool r10_live
= false;
10490 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10491 if (!TARGET_64BIT_MS_ABI
)
10492 eax_live
= ix86_eax_live_at_start_p ();
10496 emit_insn (gen_push (eax
));
10497 allocate
-= UNITS_PER_WORD
;
10501 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10502 emit_insn (gen_push (r10
));
10503 allocate
-= UNITS_PER_WORD
;
10506 emit_move_insn (eax
, GEN_INT (allocate
));
10507 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10509 /* Use the fact that AX still contains ALLOCATE. */
10510 adjust_stack_insn
= (Pmode
== DImode
10511 ? gen_pro_epilogue_adjust_stack_di_sub
10512 : gen_pro_epilogue_adjust_stack_si_sub
);
10514 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10515 stack_pointer_rtx
, eax
));
10517 /* Note that SEH directives need to continue tracking the stack
10518 pointer even after the frame pointer has been set up. */
10519 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10521 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10522 m
->fs
.cfa_offset
+= allocate
;
10524 RTX_FRAME_RELATED_P (insn
) = 1;
10525 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10526 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10527 plus_constant (Pmode
, stack_pointer_rtx
,
10530 m
->fs
.sp_offset
+= allocate
;
10532 if (r10_live
&& eax_live
)
10534 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10535 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10536 gen_frame_mem (word_mode
, t
));
10537 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10538 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10539 gen_frame_mem (word_mode
, t
));
10541 else if (eax_live
|| r10_live
)
10543 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10544 emit_move_insn (gen_rtx_REG (word_mode
,
10545 (eax_live
? AX_REG
: R10_REG
)),
10546 gen_frame_mem (word_mode
, t
));
10549 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10551 /* If we havn't already set up the frame pointer, do so now. */
10552 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10554 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10555 GEN_INT (frame
.stack_pointer_offset
10556 - frame
.hard_frame_pointer_offset
));
10557 insn
= emit_insn (insn
);
10558 RTX_FRAME_RELATED_P (insn
) = 1;
10559 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10561 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10562 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10563 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10564 m
->fs
.fp_valid
= true;
10567 if (!int_registers_saved
)
10568 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10569 if (!sse_registers_saved
)
10570 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10572 pic_reg_used
= false;
10573 if (pic_offset_table_rtx
10574 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10577 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10579 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10580 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10582 pic_reg_used
= true;
10589 if (ix86_cmodel
== CM_LARGE_PIC
)
10591 rtx label
, tmp_reg
;
10593 gcc_assert (Pmode
== DImode
);
10594 label
= gen_label_rtx ();
10595 emit_label (label
);
10596 LABEL_PRESERVE_P (label
) = 1;
10597 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10598 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10599 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10601 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10602 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10603 pic_offset_table_rtx
, tmp_reg
));
10606 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10610 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10611 RTX_FRAME_RELATED_P (insn
) = 1;
10612 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10616 /* In the pic_reg_used case, make sure that the got load isn't deleted
10617 when mcount needs it. Blockage to avoid call movement across mcount
10618 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10620 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10621 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10623 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10625 /* vDRAP is setup but after reload it turns out stack realign
10626 isn't necessary, here we will emit prologue to setup DRAP
10627 without stack realign adjustment */
10628 t
= choose_baseaddr (0);
10629 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10632 /* Prevent instructions from being scheduled into register save push
10633 sequence when access to the redzone area is done through frame pointer.
10634 The offset between the frame pointer and the stack pointer is calculated
10635 relative to the value of the stack pointer at the end of the function
10636 prologue, and moving instructions that access redzone area via frame
10637 pointer inside push sequence violates this assumption. */
10638 if (frame_pointer_needed
&& frame
.red_zone_size
)
10639 emit_insn (gen_memory_blockage ());
10641 /* Emit cld instruction if stringops are used in the function. */
10642 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10643 emit_insn (gen_cld ());
10645 /* SEH requires that the prologue end within 256 bytes of the start of
10646 the function. Prevent instruction schedules that would extend that.
10647 Further, prevent alloca modifications to the stack pointer from being
10648 combined with prologue modifications. */
10650 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10653 /* Emit code to restore REG using a POP insn. */
10656 ix86_emit_restore_reg_using_pop (rtx reg
)
10658 struct machine_function
*m
= cfun
->machine
;
10659 rtx insn
= emit_insn (gen_pop (reg
));
10661 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10662 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10664 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10665 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10667 /* Previously we'd represented the CFA as an expression
10668 like *(%ebp - 8). We've just popped that value from
10669 the stack, which means we need to reset the CFA to
10670 the drap register. This will remain until we restore
10671 the stack pointer. */
10672 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10673 RTX_FRAME_RELATED_P (insn
) = 1;
10675 /* This means that the DRAP register is valid for addressing too. */
10676 m
->fs
.drap_valid
= true;
10680 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10682 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10683 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10684 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10685 RTX_FRAME_RELATED_P (insn
) = 1;
10687 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10690 /* When the frame pointer is the CFA, and we pop it, we are
10691 swapping back to the stack pointer as the CFA. This happens
10692 for stack frames that don't allocate other data, so we assume
10693 the stack pointer is now pointing at the return address, i.e.
10694 the function entry state, which makes the offset be 1 word. */
10695 if (reg
== hard_frame_pointer_rtx
)
10697 m
->fs
.fp_valid
= false;
10698 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10700 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10701 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10703 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10704 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10705 GEN_INT (m
->fs
.cfa_offset
)));
10706 RTX_FRAME_RELATED_P (insn
) = 1;
10711 /* Emit code to restore saved registers using POP insns. */
10714 ix86_emit_restore_regs_using_pop (void)
10716 unsigned int regno
;
10718 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10719 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10720 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10723 /* Emit code and notes for the LEAVE instruction. */
10726 ix86_emit_leave (void)
10728 struct machine_function
*m
= cfun
->machine
;
10729 rtx insn
= emit_insn (ix86_gen_leave ());
10731 ix86_add_queued_cfa_restore_notes (insn
);
10733 gcc_assert (m
->fs
.fp_valid
);
10734 m
->fs
.sp_valid
= true;
10735 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10736 m
->fs
.fp_valid
= false;
10738 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10740 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10741 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10743 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10744 plus_constant (Pmode
, stack_pointer_rtx
,
10746 RTX_FRAME_RELATED_P (insn
) = 1;
10748 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10752 /* Emit code to restore saved registers using MOV insns.
10753 First register is restored from CFA - CFA_OFFSET. */
10755 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10756 bool maybe_eh_return
)
10758 struct machine_function
*m
= cfun
->machine
;
10759 unsigned int regno
;
10761 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10762 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10764 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10767 mem
= choose_baseaddr (cfa_offset
);
10768 mem
= gen_frame_mem (word_mode
, mem
);
10769 insn
= emit_move_insn (reg
, mem
);
10771 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10773 /* Previously we'd represented the CFA as an expression
10774 like *(%ebp - 8). We've just popped that value from
10775 the stack, which means we need to reset the CFA to
10776 the drap register. This will remain until we restore
10777 the stack pointer. */
10778 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10779 RTX_FRAME_RELATED_P (insn
) = 1;
10781 /* This means that the DRAP register is valid for addressing. */
10782 m
->fs
.drap_valid
= true;
10785 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10787 cfa_offset
-= UNITS_PER_WORD
;
10791 /* Emit code to restore saved registers using MOV insns.
10792 First register is restored from CFA - CFA_OFFSET. */
10794 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10795 bool maybe_eh_return
)
10797 unsigned int regno
;
10799 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10800 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10802 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10805 mem
= choose_baseaddr (cfa_offset
);
10806 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10807 set_mem_align (mem
, 128);
10808 emit_move_insn (reg
, mem
);
10810 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10816 /* Emit vzeroupper if needed. */
10819 ix86_maybe_emit_epilogue_vzeroupper (void)
10821 if (TARGET_VZEROUPPER
10822 && !TREE_THIS_VOLATILE (cfun
->decl
)
10823 && !cfun
->machine
->caller_return_avx256_p
)
10824 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10827 /* Restore function stack, frame, and registers. */
10830 ix86_expand_epilogue (int style
)
10832 struct machine_function
*m
= cfun
->machine
;
10833 struct machine_frame_state frame_state_save
= m
->fs
;
10834 struct ix86_frame frame
;
10835 bool restore_regs_via_mov
;
10838 ix86_finalize_stack_realign_flags ();
10839 ix86_compute_frame_layout (&frame
);
10841 m
->fs
.sp_valid
= (!frame_pointer_needed
10842 || (crtl
->sp_is_unchanging
10843 && !stack_realign_fp
));
10844 gcc_assert (!m
->fs
.sp_valid
10845 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10847 /* The FP must be valid if the frame pointer is present. */
10848 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10849 gcc_assert (!m
->fs
.fp_valid
10850 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10852 /* We must have *some* valid pointer to the stack frame. */
10853 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10855 /* The DRAP is never valid at this point. */
10856 gcc_assert (!m
->fs
.drap_valid
);
10858 /* See the comment about red zone and frame
10859 pointer usage in ix86_expand_prologue. */
10860 if (frame_pointer_needed
&& frame
.red_zone_size
)
10861 emit_insn (gen_memory_blockage ());
10863 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10864 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10866 /* Determine the CFA offset of the end of the red-zone. */
10867 m
->fs
.red_zone_offset
= 0;
10868 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10870 /* The red-zone begins below the return address. */
10871 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10873 /* When the register save area is in the aligned portion of
10874 the stack, determine the maximum runtime displacement that
10875 matches up with the aligned frame. */
10876 if (stack_realign_drap
)
10877 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10881 /* Special care must be taken for the normal return case of a function
10882 using eh_return: the eax and edx registers are marked as saved, but
10883 not restored along this path. Adjust the save location to match. */
10884 if (crtl
->calls_eh_return
&& style
!= 2)
10885 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10887 /* EH_RETURN requires the use of moves to function properly. */
10888 if (crtl
->calls_eh_return
)
10889 restore_regs_via_mov
= true;
10890 /* SEH requires the use of pops to identify the epilogue. */
10891 else if (TARGET_SEH
)
10892 restore_regs_via_mov
= false;
10893 /* If we're only restoring one register and sp is not valid then
10894 using a move instruction to restore the register since it's
10895 less work than reloading sp and popping the register. */
10896 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10897 restore_regs_via_mov
= true;
10898 else if (TARGET_EPILOGUE_USING_MOVE
10899 && cfun
->machine
->use_fast_prologue_epilogue
10900 && (frame
.nregs
> 1
10901 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10902 restore_regs_via_mov
= true;
10903 else if (frame_pointer_needed
10905 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10906 restore_regs_via_mov
= true;
10907 else if (frame_pointer_needed
10908 && TARGET_USE_LEAVE
10909 && cfun
->machine
->use_fast_prologue_epilogue
10910 && frame
.nregs
== 1)
10911 restore_regs_via_mov
= true;
10913 restore_regs_via_mov
= false;
10915 if (restore_regs_via_mov
|| frame
.nsseregs
)
10917 /* Ensure that the entire register save area is addressable via
10918 the stack pointer, if we will restore via sp. */
10920 && m
->fs
.sp_offset
> 0x7fffffff
10921 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10922 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10924 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10925 GEN_INT (m
->fs
.sp_offset
10926 - frame
.sse_reg_save_offset
),
10928 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10932 /* If there are any SSE registers to restore, then we have to do it
10933 via moves, since there's obviously no pop for SSE regs. */
10934 if (frame
.nsseregs
)
10935 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10938 if (restore_regs_via_mov
)
10943 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10945 /* eh_return epilogues need %ecx added to the stack pointer. */
10948 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10950 /* Stack align doesn't work with eh_return. */
10951 gcc_assert (!stack_realign_drap
);
10952 /* Neither does regparm nested functions. */
10953 gcc_assert (!ix86_static_chain_on_stack
);
10955 if (frame_pointer_needed
)
10957 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10958 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10959 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10961 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10962 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10964 /* Note that we use SA as a temporary CFA, as the return
10965 address is at the proper place relative to it. We
10966 pretend this happens at the FP restore insn because
10967 prior to this insn the FP would be stored at the wrong
10968 offset relative to SA, and after this insn we have no
10969 other reasonable register to use for the CFA. We don't
10970 bother resetting the CFA to the SP for the duration of
10971 the return insn. */
10972 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10973 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
10974 ix86_add_queued_cfa_restore_notes (insn
);
10975 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10976 RTX_FRAME_RELATED_P (insn
) = 1;
10978 m
->fs
.cfa_reg
= sa
;
10979 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10980 m
->fs
.fp_valid
= false;
10982 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10983 const0_rtx
, style
, false);
10987 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10988 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10989 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10990 ix86_add_queued_cfa_restore_notes (insn
);
10992 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10993 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10995 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10996 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10997 plus_constant (Pmode
, stack_pointer_rtx
,
10999 RTX_FRAME_RELATED_P (insn
) = 1;
11002 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11003 m
->fs
.sp_valid
= true;
11008 /* SEH requires that the function end with (1) a stack adjustment
11009 if necessary, (2) a sequence of pops, and (3) a return or
11010 jump instruction. Prevent insns from the function body from
11011 being scheduled into this sequence. */
11014 /* Prevent a catch region from being adjacent to the standard
11015 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11016 several other flags that would be interesting to test are
11018 if (flag_non_call_exceptions
)
11019 emit_insn (gen_nops (const1_rtx
));
11021 emit_insn (gen_blockage ());
11024 /* First step is to deallocate the stack frame so that we can
11025 pop the registers. Also do it on SEH target for very large
11026 frame as the emitted instructions aren't allowed by the ABI in
11028 if (!m
->fs
.sp_valid
11030 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11031 >= SEH_MAX_FRAME_SIZE
)))
11033 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11034 GEN_INT (m
->fs
.fp_offset
11035 - frame
.reg_save_offset
),
11038 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11040 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11041 GEN_INT (m
->fs
.sp_offset
11042 - frame
.reg_save_offset
),
11044 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11047 ix86_emit_restore_regs_using_pop ();
11050 /* If we used a stack pointer and haven't already got rid of it,
11052 if (m
->fs
.fp_valid
)
11054 /* If the stack pointer is valid and pointing at the frame
11055 pointer store address, then we only need a pop. */
11056 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11057 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11058 /* Leave results in shorter dependency chains on CPUs that are
11059 able to grok it fast. */
11060 else if (TARGET_USE_LEAVE
11061 || optimize_function_for_size_p (cfun
)
11062 || !cfun
->machine
->use_fast_prologue_epilogue
)
11063 ix86_emit_leave ();
11066 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11067 hard_frame_pointer_rtx
,
11068 const0_rtx
, style
, !using_drap
);
11069 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11075 int param_ptr_offset
= UNITS_PER_WORD
;
11078 gcc_assert (stack_realign_drap
);
11080 if (ix86_static_chain_on_stack
)
11081 param_ptr_offset
+= UNITS_PER_WORD
;
11082 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11083 param_ptr_offset
+= UNITS_PER_WORD
;
11085 insn
= emit_insn (gen_rtx_SET
11086 (VOIDmode
, stack_pointer_rtx
,
11087 gen_rtx_PLUS (Pmode
,
11089 GEN_INT (-param_ptr_offset
))));
11090 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11091 m
->fs
.cfa_offset
= param_ptr_offset
;
11092 m
->fs
.sp_offset
= param_ptr_offset
;
11093 m
->fs
.realigned
= false;
11095 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11096 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11097 GEN_INT (param_ptr_offset
)));
11098 RTX_FRAME_RELATED_P (insn
) = 1;
11100 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11101 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11104 /* At this point the stack pointer must be valid, and we must have
11105 restored all of the registers. We may not have deallocated the
11106 entire stack frame. We've delayed this until now because it may
11107 be possible to merge the local stack deallocation with the
11108 deallocation forced by ix86_static_chain_on_stack. */
11109 gcc_assert (m
->fs
.sp_valid
);
11110 gcc_assert (!m
->fs
.fp_valid
);
11111 gcc_assert (!m
->fs
.realigned
);
11112 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11114 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11115 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11119 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11121 /* Sibcall epilogues don't want a return instruction. */
11124 m
->fs
= frame_state_save
;
11128 /* Emit vzeroupper if needed. */
11129 ix86_maybe_emit_epilogue_vzeroupper ();
11131 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11133 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11135 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11136 address, do explicit add, and jump indirectly to the caller. */
11138 if (crtl
->args
.pops_args
>= 65536)
11140 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11143 /* There is no "pascal" calling convention in any 64bit ABI. */
11144 gcc_assert (!TARGET_64BIT
);
11146 insn
= emit_insn (gen_pop (ecx
));
11147 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11148 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11150 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11151 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11152 add_reg_note (insn
, REG_CFA_REGISTER
,
11153 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11154 RTX_FRAME_RELATED_P (insn
) = 1;
11156 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11158 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11161 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11164 emit_jump_insn (gen_simple_return_internal ());
11166 /* Restore the state back to the state from the prologue,
11167 so that it's correct for the next epilogue. */
11168 m
->fs
= frame_state_save
;
11171 /* Reset from the function's potential modifications. */
11174 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11175 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11177 if (pic_offset_table_rtx
)
11178 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11180 /* Mach-O doesn't support labels at the end of objects, so if
11181 it looks like we might want one, insert a NOP. */
11183 rtx insn
= get_last_insn ();
11184 rtx deleted_debug_label
= NULL_RTX
;
11187 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11189 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11190 notes only, instead set their CODE_LABEL_NUMBER to -1,
11191 otherwise there would be code generation differences
11192 in between -g and -g0. */
11193 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11194 deleted_debug_label
= insn
;
11195 insn
= PREV_INSN (insn
);
11200 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11201 fputs ("\tnop\n", file
);
11202 else if (deleted_debug_label
)
11203 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11204 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11205 CODE_LABEL_NUMBER (insn
) = -1;
11211 /* Return a scratch register to use in the split stack prologue. The
11212 split stack prologue is used for -fsplit-stack. It is the first
11213 instructions in the function, even before the regular prologue.
11214 The scratch register can be any caller-saved register which is not
11215 used for parameters or for the static chain. */
11217 static unsigned int
11218 split_stack_prologue_scratch_regno (void)
11227 is_fastcall
= (lookup_attribute ("fastcall",
11228 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11230 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11234 if (DECL_STATIC_CHAIN (cfun
->decl
))
11236 sorry ("-fsplit-stack does not support fastcall with "
11237 "nested function");
11238 return INVALID_REGNUM
;
11242 else if (regparm
< 3)
11244 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11250 sorry ("-fsplit-stack does not support 2 register "
11251 " parameters for a nested function");
11252 return INVALID_REGNUM
;
11259 /* FIXME: We could make this work by pushing a register
11260 around the addition and comparison. */
11261 sorry ("-fsplit-stack does not support 3 register parameters");
11262 return INVALID_REGNUM
;
11267 /* A SYMBOL_REF for the function which allocates new stackspace for
11270 static GTY(()) rtx split_stack_fn
;
11272 /* A SYMBOL_REF for the more stack function when using the large
11275 static GTY(()) rtx split_stack_fn_large
;
11277 /* Handle -fsplit-stack. These are the first instructions in the
11278 function, even before the regular prologue. */
11281 ix86_expand_split_stack_prologue (void)
11283 struct ix86_frame frame
;
11284 HOST_WIDE_INT allocate
;
11285 unsigned HOST_WIDE_INT args_size
;
11286 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11287 rtx scratch_reg
= NULL_RTX
;
11288 rtx varargs_label
= NULL_RTX
;
11291 gcc_assert (flag_split_stack
&& reload_completed
);
11293 ix86_finalize_stack_realign_flags ();
11294 ix86_compute_frame_layout (&frame
);
11295 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11297 /* This is the label we will branch to if we have enough stack
11298 space. We expect the basic block reordering pass to reverse this
11299 branch if optimizing, so that we branch in the unlikely case. */
11300 label
= gen_label_rtx ();
11302 /* We need to compare the stack pointer minus the frame size with
11303 the stack boundary in the TCB. The stack boundary always gives
11304 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11305 can compare directly. Otherwise we need to do an addition. */
11307 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11308 UNSPEC_STACK_CHECK
);
11309 limit
= gen_rtx_CONST (Pmode
, limit
);
11310 limit
= gen_rtx_MEM (Pmode
, limit
);
11311 if (allocate
< SPLIT_STACK_AVAILABLE
)
11312 current
= stack_pointer_rtx
;
11315 unsigned int scratch_regno
;
11318 /* We need a scratch register to hold the stack pointer minus
11319 the required frame size. Since this is the very start of the
11320 function, the scratch register can be any caller-saved
11321 register which is not used for parameters. */
11322 offset
= GEN_INT (- allocate
);
11323 scratch_regno
= split_stack_prologue_scratch_regno ();
11324 if (scratch_regno
== INVALID_REGNUM
)
11326 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11327 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11329 /* We don't use ix86_gen_add3 in this case because it will
11330 want to split to lea, but when not optimizing the insn
11331 will not be split after this point. */
11332 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11333 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11338 emit_move_insn (scratch_reg
, offset
);
11339 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11340 stack_pointer_rtx
));
11342 current
= scratch_reg
;
11345 ix86_expand_branch (GEU
, current
, limit
, label
);
11346 jump_insn
= get_last_insn ();
11347 JUMP_LABEL (jump_insn
) = label
;
11349 /* Mark the jump as very likely to be taken. */
11350 add_reg_note (jump_insn
, REG_BR_PROB
,
11351 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11353 if (split_stack_fn
== NULL_RTX
)
11354 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11355 fn
= split_stack_fn
;
11357 /* Get more stack space. We pass in the desired stack space and the
11358 size of the arguments to copy to the new stack. In 32-bit mode
11359 we push the parameters; __morestack will return on a new stack
11360 anyhow. In 64-bit mode we pass the parameters in r10 and
11362 allocate_rtx
= GEN_INT (allocate
);
11363 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11364 call_fusage
= NULL_RTX
;
11369 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11370 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11372 /* If this function uses a static chain, it will be in %r10.
11373 Preserve it across the call to __morestack. */
11374 if (DECL_STATIC_CHAIN (cfun
->decl
))
11378 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11379 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11380 use_reg (&call_fusage
, rax
);
11383 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11385 HOST_WIDE_INT argval
;
11387 gcc_assert (Pmode
== DImode
);
11388 /* When using the large model we need to load the address
11389 into a register, and we've run out of registers. So we
11390 switch to a different calling convention, and we call a
11391 different function: __morestack_large. We pass the
11392 argument size in the upper 32 bits of r10 and pass the
11393 frame size in the lower 32 bits. */
11394 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11395 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11397 if (split_stack_fn_large
== NULL_RTX
)
11398 split_stack_fn_large
=
11399 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11401 if (ix86_cmodel
== CM_LARGE_PIC
)
11405 label
= gen_label_rtx ();
11406 emit_label (label
);
11407 LABEL_PRESERVE_P (label
) = 1;
11408 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11409 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11410 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11411 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11413 x
= gen_rtx_CONST (Pmode
, x
);
11414 emit_move_insn (reg11
, x
);
11415 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11416 x
= gen_const_mem (Pmode
, x
);
11417 emit_move_insn (reg11
, x
);
11420 emit_move_insn (reg11
, split_stack_fn_large
);
11424 argval
= ((args_size
<< 16) << 16) + allocate
;
11425 emit_move_insn (reg10
, GEN_INT (argval
));
11429 emit_move_insn (reg10
, allocate_rtx
);
11430 emit_move_insn (reg11
, GEN_INT (args_size
));
11431 use_reg (&call_fusage
, reg11
);
11434 use_reg (&call_fusage
, reg10
);
11438 emit_insn (gen_push (GEN_INT (args_size
)));
11439 emit_insn (gen_push (allocate_rtx
));
11441 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11442 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11444 add_function_usage_to (call_insn
, call_fusage
);
11446 /* In order to make call/return prediction work right, we now need
11447 to execute a return instruction. See
11448 libgcc/config/i386/morestack.S for the details on how this works.
11450 For flow purposes gcc must not see this as a return
11451 instruction--we need control flow to continue at the subsequent
11452 label. Therefore, we use an unspec. */
11453 gcc_assert (crtl
->args
.pops_args
< 65536);
11454 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11456 /* If we are in 64-bit mode and this function uses a static chain,
11457 we saved %r10 in %rax before calling _morestack. */
11458 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11459 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11460 gen_rtx_REG (word_mode
, AX_REG
));
11462 /* If this function calls va_start, we need to store a pointer to
11463 the arguments on the old stack, because they may not have been
11464 all copied to the new stack. At this point the old stack can be
11465 found at the frame pointer value used by __morestack, because
11466 __morestack has set that up before calling back to us. Here we
11467 store that pointer in a scratch register, and in
11468 ix86_expand_prologue we store the scratch register in a stack
11470 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11472 unsigned int scratch_regno
;
11476 scratch_regno
= split_stack_prologue_scratch_regno ();
11477 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11478 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11482 return address within this function
11483 return address of caller of this function
11485 So we add three words to get to the stack arguments.
11489 return address within this function
11490 first argument to __morestack
11491 second argument to __morestack
11492 return address of caller of this function
11494 So we add five words to get to the stack arguments.
11496 words
= TARGET_64BIT
? 3 : 5;
11497 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11498 gen_rtx_PLUS (Pmode
, frame_reg
,
11499 GEN_INT (words
* UNITS_PER_WORD
))));
11501 varargs_label
= gen_label_rtx ();
11502 emit_jump_insn (gen_jump (varargs_label
));
11503 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11508 emit_label (label
);
11509 LABEL_NUSES (label
) = 1;
11511 /* If this function calls va_start, we now have to set the scratch
11512 register for the case where we do not call __morestack. In this
11513 case we need to set it based on the stack pointer. */
11514 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11516 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11517 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11518 GEN_INT (UNITS_PER_WORD
))));
11520 emit_label (varargs_label
);
11521 LABEL_NUSES (varargs_label
) = 1;
11525 /* We may have to tell the dataflow pass that the split stack prologue
11526 is initializing a scratch register. */
11529 ix86_live_on_entry (bitmap regs
)
11531 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11533 gcc_assert (flag_split_stack
);
11534 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11538 /* Determine if op is suitable SUBREG RTX for address. */
11541 ix86_address_subreg_operand (rtx op
)
11543 enum machine_mode mode
;
11548 mode
= GET_MODE (op
);
11550 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11553 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11554 failures when the register is one word out of a two word structure. */
11555 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11558 /* Allow only SUBREGs of non-eliminable hard registers. */
11559 return register_no_elim_operand (op
, mode
);
11562 /* Extract the parts of an RTL expression that is a valid memory address
11563 for an instruction. Return 0 if the structure of the address is
11564 grossly off. Return -1 if the address contains ASHIFT, so it is not
11565 strictly valid, but still used for computing length of lea instruction. */
11568 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11570 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11571 rtx base_reg
, index_reg
;
11572 HOST_WIDE_INT scale
= 1;
11573 rtx scale_rtx
= NULL_RTX
;
11576 enum ix86_address_seg seg
= SEG_DEFAULT
;
11578 /* Allow zero-extended SImode addresses,
11579 they will be emitted with addr32 prefix. */
11580 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11582 if (GET_CODE (addr
) == ZERO_EXTEND
11583 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11584 addr
= XEXP (addr
, 0);
11585 else if (GET_CODE (addr
) == AND
11586 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11588 addr
= XEXP (addr
, 0);
11590 /* Adjust SUBREGs. */
11591 if (GET_CODE (addr
) == SUBREG
11592 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11593 addr
= SUBREG_REG (addr
);
11594 else if (GET_MODE (addr
) == DImode
)
11595 addr
= gen_rtx_SUBREG (SImode
, addr
, 0);
11596 else if (GET_MODE (addr
) != VOIDmode
)
11603 else if (GET_CODE (addr
) == SUBREG
)
11605 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11610 else if (GET_CODE (addr
) == PLUS
)
11612 rtx addends
[4], op
;
11620 addends
[n
++] = XEXP (op
, 1);
11623 while (GET_CODE (op
) == PLUS
);
11628 for (i
= n
; i
>= 0; --i
)
11631 switch (GET_CODE (op
))
11636 index
= XEXP (op
, 0);
11637 scale_rtx
= XEXP (op
, 1);
11643 index
= XEXP (op
, 0);
11644 tmp
= XEXP (op
, 1);
11645 if (!CONST_INT_P (tmp
))
11647 scale
= INTVAL (tmp
);
11648 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11650 scale
= 1 << scale
;
11655 if (GET_CODE (op
) != UNSPEC
)
11660 if (XINT (op
, 1) == UNSPEC_TP
11661 && TARGET_TLS_DIRECT_SEG_REFS
11662 && seg
== SEG_DEFAULT
)
11663 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11669 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11696 else if (GET_CODE (addr
) == MULT
)
11698 index
= XEXP (addr
, 0); /* index*scale */
11699 scale_rtx
= XEXP (addr
, 1);
11701 else if (GET_CODE (addr
) == ASHIFT
)
11703 /* We're called for lea too, which implements ashift on occasion. */
11704 index
= XEXP (addr
, 0);
11705 tmp
= XEXP (addr
, 1);
11706 if (!CONST_INT_P (tmp
))
11708 scale
= INTVAL (tmp
);
11709 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11711 scale
= 1 << scale
;
11715 disp
= addr
; /* displacement */
11721 else if (GET_CODE (index
) == SUBREG
11722 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11728 /* Address override works only on the (%reg) part of %fs:(%reg). */
11729 if (seg
!= SEG_DEFAULT
11730 && ((base
&& GET_MODE (base
) != word_mode
)
11731 || (index
&& GET_MODE (index
) != word_mode
)))
11734 /* Extract the integral value of scale. */
11737 if (!CONST_INT_P (scale_rtx
))
11739 scale
= INTVAL (scale_rtx
);
11742 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11743 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11745 /* Avoid useless 0 displacement. */
11746 if (disp
== const0_rtx
&& (base
|| index
))
11749 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11750 if (base_reg
&& index_reg
&& scale
== 1
11751 && (index_reg
== arg_pointer_rtx
11752 || index_reg
== frame_pointer_rtx
11753 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11756 tmp
= base
, base
= index
, index
= tmp
;
11757 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11760 /* Special case: %ebp cannot be encoded as a base without a displacement.
11764 && (base_reg
== hard_frame_pointer_rtx
11765 || base_reg
== frame_pointer_rtx
11766 || base_reg
== arg_pointer_rtx
11767 || (REG_P (base_reg
)
11768 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11769 || REGNO (base_reg
) == R13_REG
))))
11772 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11773 Avoid this by transforming to [%esi+0].
11774 Reload calls address legitimization without cfun defined, so we need
11775 to test cfun for being non-NULL. */
11776 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11777 && base_reg
&& !index_reg
&& !disp
11778 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11781 /* Special case: encode reg+reg instead of reg*2. */
11782 if (!base
&& index
&& scale
== 2)
11783 base
= index
, base_reg
= index_reg
, scale
= 1;
11785 /* Special case: scaling cannot be encoded without base or displacement. */
11786 if (!base
&& !disp
&& index
&& scale
!= 1)
11790 out
->index
= index
;
11792 out
->scale
= scale
;
11798 /* Return cost of the memory address x.
11799 For i386, it is better to use a complex address than let gcc copy
11800 the address into a reg and make a new pseudo. But not if the address
11801 requires to two regs - that would mean more pseudos with longer
11804 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11806 struct ix86_address parts
;
11808 int ok
= ix86_decompose_address (x
, &parts
);
11812 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11813 parts
.base
= SUBREG_REG (parts
.base
);
11814 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11815 parts
.index
= SUBREG_REG (parts
.index
);
11817 /* Attempt to minimize number of registers in the address. */
11819 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11821 && (!REG_P (parts
.index
)
11822 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11826 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11828 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11829 && parts
.base
!= parts
.index
)
11832 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11833 since it's predecode logic can't detect the length of instructions
11834 and it degenerates to vector decoded. Increase cost of such
11835 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11836 to split such addresses or even refuse such addresses at all.
11838 Following addressing modes are affected:
11843 The first and last case may be avoidable by explicitly coding the zero in
11844 memory address, but I don't have AMD-K6 machine handy to check this
11848 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11849 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11850 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11856 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11857 this is used for to form addresses to local data when -fPIC is in
11861 darwin_local_data_pic (rtx disp
)
11863 return (GET_CODE (disp
) == UNSPEC
11864 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11867 /* Determine if a given RTX is a valid constant. We already know this
11868 satisfies CONSTANT_P. */
11871 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11873 switch (GET_CODE (x
))
11878 if (GET_CODE (x
) == PLUS
)
11880 if (!CONST_INT_P (XEXP (x
, 1)))
11885 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11888 /* Only some unspecs are valid as "constants". */
11889 if (GET_CODE (x
) == UNSPEC
)
11890 switch (XINT (x
, 1))
11893 case UNSPEC_GOTOFF
:
11894 case UNSPEC_PLTOFF
:
11895 return TARGET_64BIT
;
11897 case UNSPEC_NTPOFF
:
11898 x
= XVECEXP (x
, 0, 0);
11899 return (GET_CODE (x
) == SYMBOL_REF
11900 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11901 case UNSPEC_DTPOFF
:
11902 x
= XVECEXP (x
, 0, 0);
11903 return (GET_CODE (x
) == SYMBOL_REF
11904 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11909 /* We must have drilled down to a symbol. */
11910 if (GET_CODE (x
) == LABEL_REF
)
11912 if (GET_CODE (x
) != SYMBOL_REF
)
11917 /* TLS symbols are never valid. */
11918 if (SYMBOL_REF_TLS_MODEL (x
))
11921 /* DLLIMPORT symbols are never valid. */
11922 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11923 && SYMBOL_REF_DLLIMPORT_P (x
))
11927 /* mdynamic-no-pic */
11928 if (MACHO_DYNAMIC_NO_PIC_P
)
11929 return machopic_symbol_defined_p (x
);
11934 if (GET_MODE (x
) == TImode
11935 && x
!= CONST0_RTX (TImode
)
11941 if (!standard_sse_constant_p (x
))
11948 /* Otherwise we handle everything else in the move patterns. */
11952 /* Determine if it's legal to put X into the constant pool. This
11953 is not possible for the address of thread-local symbols, which
11954 is checked above. */
11957 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11959 /* We can always put integral constants and vectors in memory. */
11960 switch (GET_CODE (x
))
11970 return !ix86_legitimate_constant_p (mode
, x
);
11974 /* Nonzero if the constant value X is a legitimate general operand
11975 when generating PIC code. It is given that flag_pic is on and
11976 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11979 legitimate_pic_operand_p (rtx x
)
11983 switch (GET_CODE (x
))
11986 inner
= XEXP (x
, 0);
11987 if (GET_CODE (inner
) == PLUS
11988 && CONST_INT_P (XEXP (inner
, 1)))
11989 inner
= XEXP (inner
, 0);
11991 /* Only some unspecs are valid as "constants". */
11992 if (GET_CODE (inner
) == UNSPEC
)
11993 switch (XINT (inner
, 1))
11996 case UNSPEC_GOTOFF
:
11997 case UNSPEC_PLTOFF
:
11998 return TARGET_64BIT
;
12000 x
= XVECEXP (inner
, 0, 0);
12001 return (GET_CODE (x
) == SYMBOL_REF
12002 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12003 case UNSPEC_MACHOPIC_OFFSET
:
12004 return legitimate_pic_address_disp_p (x
);
12012 return legitimate_pic_address_disp_p (x
);
12019 /* Determine if a given CONST RTX is a valid memory displacement
12023 legitimate_pic_address_disp_p (rtx disp
)
12027 /* In 64bit mode we can allow direct addresses of symbols and labels
12028 when they are not dynamic symbols. */
12031 rtx op0
= disp
, op1
;
12033 switch (GET_CODE (disp
))
12039 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12041 op0
= XEXP (XEXP (disp
, 0), 0);
12042 op1
= XEXP (XEXP (disp
, 0), 1);
12043 if (!CONST_INT_P (op1
)
12044 || INTVAL (op1
) >= 16*1024*1024
12045 || INTVAL (op1
) < -16*1024*1024)
12047 if (GET_CODE (op0
) == LABEL_REF
)
12049 if (GET_CODE (op0
) == CONST
12050 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12051 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12053 if (GET_CODE (op0
) == UNSPEC
12054 && XINT (op0
, 1) == UNSPEC_PCREL
)
12056 if (GET_CODE (op0
) != SYMBOL_REF
)
12061 /* TLS references should always be enclosed in UNSPEC. */
12062 if (SYMBOL_REF_TLS_MODEL (op0
))
12064 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12065 && ix86_cmodel
!= CM_LARGE_PIC
)
12073 if (GET_CODE (disp
) != CONST
)
12075 disp
= XEXP (disp
, 0);
12079 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12080 of GOT tables. We should not need these anyway. */
12081 if (GET_CODE (disp
) != UNSPEC
12082 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12083 && XINT (disp
, 1) != UNSPEC_GOTOFF
12084 && XINT (disp
, 1) != UNSPEC_PCREL
12085 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12088 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12089 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12095 if (GET_CODE (disp
) == PLUS
)
12097 if (!CONST_INT_P (XEXP (disp
, 1)))
12099 disp
= XEXP (disp
, 0);
12103 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12106 if (GET_CODE (disp
) != UNSPEC
)
12109 switch (XINT (disp
, 1))
12114 /* We need to check for both symbols and labels because VxWorks loads
12115 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12117 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12118 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12119 case UNSPEC_GOTOFF
:
12120 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12121 While ABI specify also 32bit relocation but we don't produce it in
12122 small PIC model at all. */
12123 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12124 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12126 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12128 case UNSPEC_GOTTPOFF
:
12129 case UNSPEC_GOTNTPOFF
:
12130 case UNSPEC_INDNTPOFF
:
12133 disp
= XVECEXP (disp
, 0, 0);
12134 return (GET_CODE (disp
) == SYMBOL_REF
12135 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12136 case UNSPEC_NTPOFF
:
12137 disp
= XVECEXP (disp
, 0, 0);
12138 return (GET_CODE (disp
) == SYMBOL_REF
12139 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12140 case UNSPEC_DTPOFF
:
12141 disp
= XVECEXP (disp
, 0, 0);
12142 return (GET_CODE (disp
) == SYMBOL_REF
12143 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12149 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12150 replace the input X, or the original X if no replacement is called for.
12151 The output parameter *WIN is 1 if the calling macro should goto WIN,
12152 0 if it should not. */
12155 ix86_legitimize_reload_address (rtx x
,
12156 enum machine_mode mode ATTRIBUTE_UNUSED
,
12157 int opnum
, int type
,
12158 int ind_levels ATTRIBUTE_UNUSED
)
12160 /* Reload can generate:
12162 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12166 This RTX is rejected from ix86_legitimate_address_p due to
12167 non-strictness of base register 97. Following this rejection,
12168 reload pushes all three components into separate registers,
12169 creating invalid memory address RTX.
12171 Following code reloads only the invalid part of the
12172 memory address RTX. */
12174 if (GET_CODE (x
) == PLUS
12175 && REG_P (XEXP (x
, 1))
12176 && GET_CODE (XEXP (x
, 0)) == PLUS
12177 && REG_P (XEXP (XEXP (x
, 0), 1)))
12180 bool something_reloaded
= false;
12182 base
= XEXP (XEXP (x
, 0), 1);
12183 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12185 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12186 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12187 opnum
, (enum reload_type
) type
);
12188 something_reloaded
= true;
12191 index
= XEXP (x
, 1);
12192 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12194 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12195 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12196 opnum
, (enum reload_type
) type
);
12197 something_reloaded
= true;
12200 gcc_assert (something_reloaded
);
12207 /* Recognizes RTL expressions that are valid memory addresses for an
12208 instruction. The MODE argument is the machine mode for the MEM
12209 expression that wants to use this address.
12211 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12212 convert common non-canonical forms to canonical form so that they will
12216 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12217 rtx addr
, bool strict
)
12219 struct ix86_address parts
;
12220 rtx base
, index
, disp
;
12221 HOST_WIDE_INT scale
;
12223 /* Since constant address in x32 is signed extended to 64bit,
12224 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12226 && CONST_INT_P (addr
)
12227 && INTVAL (addr
) < 0)
12230 if (ix86_decompose_address (addr
, &parts
) <= 0)
12231 /* Decomposition failed. */
12235 index
= parts
.index
;
12237 scale
= parts
.scale
;
12239 /* Validate base register. */
12246 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12247 reg
= SUBREG_REG (base
);
12249 /* Base is not a register. */
12252 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12255 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12256 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12257 /* Base is not valid. */
12261 /* Validate index register. */
12268 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12269 reg
= SUBREG_REG (index
);
12271 /* Index is not a register. */
12274 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12277 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12278 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12279 /* Index is not valid. */
12283 /* Index and base should have the same mode. */
12285 && GET_MODE (base
) != GET_MODE (index
))
12288 /* Validate scale factor. */
12292 /* Scale without index. */
12295 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12296 /* Scale is not a valid multiplier. */
12300 /* Validate displacement. */
12303 if (GET_CODE (disp
) == CONST
12304 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12305 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12306 switch (XINT (XEXP (disp
, 0), 1))
12308 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12309 used. While ABI specify also 32bit relocations, we don't produce
12310 them at all and use IP relative instead. */
12312 case UNSPEC_GOTOFF
:
12313 gcc_assert (flag_pic
);
12315 goto is_legitimate_pic
;
12317 /* 64bit address unspec. */
12320 case UNSPEC_GOTPCREL
:
12322 gcc_assert (flag_pic
);
12323 goto is_legitimate_pic
;
12325 case UNSPEC_GOTTPOFF
:
12326 case UNSPEC_GOTNTPOFF
:
12327 case UNSPEC_INDNTPOFF
:
12328 case UNSPEC_NTPOFF
:
12329 case UNSPEC_DTPOFF
:
12332 case UNSPEC_STACK_CHECK
:
12333 gcc_assert (flag_split_stack
);
12337 /* Invalid address unspec. */
12341 else if (SYMBOLIC_CONST (disp
)
12345 && MACHOPIC_INDIRECT
12346 && !machopic_operand_p (disp
)
12352 if (TARGET_64BIT
&& (index
|| base
))
12354 /* foo@dtpoff(%rX) is ok. */
12355 if (GET_CODE (disp
) != CONST
12356 || GET_CODE (XEXP (disp
, 0)) != PLUS
12357 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12358 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12359 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12360 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12361 /* Non-constant pic memory reference. */
12364 else if ((!TARGET_MACHO
|| flag_pic
)
12365 && ! legitimate_pic_address_disp_p (disp
))
12366 /* Displacement is an invalid pic construct. */
12369 else if (MACHO_DYNAMIC_NO_PIC_P
12370 && !ix86_legitimate_constant_p (Pmode
, disp
))
12371 /* displacment must be referenced via non_lazy_pointer */
12375 /* This code used to verify that a symbolic pic displacement
12376 includes the pic_offset_table_rtx register.
12378 While this is good idea, unfortunately these constructs may
12379 be created by "adds using lea" optimization for incorrect
12388 This code is nonsensical, but results in addressing
12389 GOT table with pic_offset_table_rtx base. We can't
12390 just refuse it easily, since it gets matched by
12391 "addsi3" pattern, that later gets split to lea in the
12392 case output register differs from input. While this
12393 can be handled by separate addsi pattern for this case
12394 that never results in lea, this seems to be easier and
12395 correct fix for crash to disable this test. */
12397 else if (GET_CODE (disp
) != LABEL_REF
12398 && !CONST_INT_P (disp
)
12399 && (GET_CODE (disp
) != CONST
12400 || !ix86_legitimate_constant_p (Pmode
, disp
))
12401 && (GET_CODE (disp
) != SYMBOL_REF
12402 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12403 /* Displacement is not constant. */
12405 else if (TARGET_64BIT
12406 && !x86_64_immediate_operand (disp
, VOIDmode
))
12407 /* Displacement is out of range. */
12411 /* Everything looks valid. */
12415 /* Determine if a given RTX is a valid constant address. */
12418 constant_address_p (rtx x
)
12420 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12423 /* Return a unique alias set for the GOT. */
12425 static alias_set_type
12426 ix86_GOT_alias_set (void)
12428 static alias_set_type set
= -1;
12430 set
= new_alias_set ();
12434 /* Return a legitimate reference for ORIG (an address) using the
12435 register REG. If REG is 0, a new pseudo is generated.
12437 There are two types of references that must be handled:
12439 1. Global data references must load the address from the GOT, via
12440 the PIC reg. An insn is emitted to do this load, and the reg is
12443 2. Static data references, constant pool addresses, and code labels
12444 compute the address as an offset from the GOT, whose base is in
12445 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12446 differentiate them from global data objects. The returned
12447 address is the PIC reg + an unspec constant.
12449 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12450 reg also appears in the address. */
12453 legitimize_pic_address (rtx orig
, rtx reg
)
12456 rtx new_rtx
= orig
;
12460 if (TARGET_MACHO
&& !TARGET_64BIT
)
12463 reg
= gen_reg_rtx (Pmode
);
12464 /* Use the generic Mach-O PIC machinery. */
12465 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12469 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12471 else if (TARGET_64BIT
12472 && ix86_cmodel
!= CM_SMALL_PIC
12473 && gotoff_operand (addr
, Pmode
))
12476 /* This symbol may be referenced via a displacement from the PIC
12477 base address (@GOTOFF). */
12479 if (reload_in_progress
)
12480 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12481 if (GET_CODE (addr
) == CONST
)
12482 addr
= XEXP (addr
, 0);
12483 if (GET_CODE (addr
) == PLUS
)
12485 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12487 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12490 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12491 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12493 tmpreg
= gen_reg_rtx (Pmode
);
12496 emit_move_insn (tmpreg
, new_rtx
);
12500 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12501 tmpreg
, 1, OPTAB_DIRECT
);
12504 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12506 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12508 /* This symbol may be referenced via a displacement from the PIC
12509 base address (@GOTOFF). */
12511 if (reload_in_progress
)
12512 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12513 if (GET_CODE (addr
) == CONST
)
12514 addr
= XEXP (addr
, 0);
12515 if (GET_CODE (addr
) == PLUS
)
12517 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12519 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12522 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12523 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12524 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12528 emit_move_insn (reg
, new_rtx
);
12532 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12533 /* We can't use @GOTOFF for text labels on VxWorks;
12534 see gotoff_operand. */
12535 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12537 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12539 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12540 return legitimize_dllimport_symbol (addr
, true);
12541 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12542 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12543 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12545 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12546 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12550 /* For x64 PE-COFF there is no GOT table. So we use address
12552 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12554 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12555 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12558 reg
= gen_reg_rtx (Pmode
);
12559 emit_move_insn (reg
, new_rtx
);
12562 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12564 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12565 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12566 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12567 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12570 reg
= gen_reg_rtx (Pmode
);
12571 /* Use directly gen_movsi, otherwise the address is loaded
12572 into register for CSE. We don't want to CSE this addresses,
12573 instead we CSE addresses from the GOT table, so skip this. */
12574 emit_insn (gen_movsi (reg
, new_rtx
));
12579 /* This symbol must be referenced via a load from the
12580 Global Offset Table (@GOT). */
12582 if (reload_in_progress
)
12583 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12584 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12585 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12587 new_rtx
= force_reg (Pmode
, new_rtx
);
12588 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12589 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12590 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12593 reg
= gen_reg_rtx (Pmode
);
12594 emit_move_insn (reg
, new_rtx
);
12600 if (CONST_INT_P (addr
)
12601 && !x86_64_immediate_operand (addr
, VOIDmode
))
12605 emit_move_insn (reg
, addr
);
12609 new_rtx
= force_reg (Pmode
, addr
);
12611 else if (GET_CODE (addr
) == CONST
)
12613 addr
= XEXP (addr
, 0);
12615 /* We must match stuff we generate before. Assume the only
12616 unspecs that can get here are ours. Not that we could do
12617 anything with them anyway.... */
12618 if (GET_CODE (addr
) == UNSPEC
12619 || (GET_CODE (addr
) == PLUS
12620 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12622 gcc_assert (GET_CODE (addr
) == PLUS
);
12624 if (GET_CODE (addr
) == PLUS
)
12626 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12628 /* Check first to see if this is a constant offset from a @GOTOFF
12629 symbol reference. */
12630 if (gotoff_operand (op0
, Pmode
)
12631 && CONST_INT_P (op1
))
12635 if (reload_in_progress
)
12636 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12637 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12639 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12640 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12641 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12645 emit_move_insn (reg
, new_rtx
);
12651 if (INTVAL (op1
) < -16*1024*1024
12652 || INTVAL (op1
) >= 16*1024*1024)
12654 if (!x86_64_immediate_operand (op1
, Pmode
))
12655 op1
= force_reg (Pmode
, op1
);
12656 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12662 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12663 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12664 base
== reg
? NULL_RTX
: reg
);
12666 if (CONST_INT_P (new_rtx
))
12667 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
12670 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12672 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12673 new_rtx
= XEXP (new_rtx
, 1);
12675 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12683 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12686 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12688 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12690 if (GET_MODE (tp
) != tp_mode
)
12692 gcc_assert (GET_MODE (tp
) == SImode
);
12693 gcc_assert (tp_mode
== DImode
);
12695 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12699 tp
= copy_to_mode_reg (tp_mode
, tp
);
12704 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12706 static GTY(()) rtx ix86_tls_symbol
;
12709 ix86_tls_get_addr (void)
12711 if (!ix86_tls_symbol
)
12714 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12715 ? "___tls_get_addr" : "__tls_get_addr");
12717 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12720 return ix86_tls_symbol
;
12723 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12725 static GTY(()) rtx ix86_tls_module_base_symbol
;
12728 ix86_tls_module_base (void)
12730 if (!ix86_tls_module_base_symbol
)
12732 ix86_tls_module_base_symbol
12733 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12735 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12736 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12739 return ix86_tls_module_base_symbol
;
12742 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12743 false if we expect this to be used for a memory address and true if
12744 we expect to load the address into a register. */
12747 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12749 rtx dest
, base
, off
;
12750 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12751 enum machine_mode tp_mode
= Pmode
;
12756 case TLS_MODEL_GLOBAL_DYNAMIC
:
12757 dest
= gen_reg_rtx (Pmode
);
12762 pic
= pic_offset_table_rtx
;
12765 pic
= gen_reg_rtx (Pmode
);
12766 emit_insn (gen_set_got (pic
));
12770 if (TARGET_GNU2_TLS
)
12773 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12775 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12777 tp
= get_thread_pointer (Pmode
, true);
12778 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12780 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12784 rtx caddr
= ix86_tls_get_addr ();
12788 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12791 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
12793 insns
= get_insns ();
12796 RTL_CONST_CALL_P (insns
) = 1;
12797 emit_libcall_block (insns
, dest
, rax
, x
);
12800 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12804 case TLS_MODEL_LOCAL_DYNAMIC
:
12805 base
= gen_reg_rtx (Pmode
);
12810 pic
= pic_offset_table_rtx
;
12813 pic
= gen_reg_rtx (Pmode
);
12814 emit_insn (gen_set_got (pic
));
12818 if (TARGET_GNU2_TLS
)
12820 rtx tmp
= ix86_tls_module_base ();
12823 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12825 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12827 tp
= get_thread_pointer (Pmode
, true);
12828 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12829 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12833 rtx caddr
= ix86_tls_get_addr ();
12837 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12840 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
12842 insns
= get_insns ();
12845 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12846 share the LD_BASE result with other LD model accesses. */
12847 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12848 UNSPEC_TLS_LD_BASE
);
12850 RTL_CONST_CALL_P (insns
) = 1;
12851 emit_libcall_block (insns
, base
, rax
, eqv
);
12854 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12857 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12858 off
= gen_rtx_CONST (Pmode
, off
);
12860 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12862 if (TARGET_GNU2_TLS
)
12864 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12866 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12870 case TLS_MODEL_INITIAL_EXEC
:
12873 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12875 /* The Sun linker took the AMD64 TLS spec literally
12876 and can only handle %rax as destination of the
12877 initial executable code sequence. */
12879 dest
= gen_reg_rtx (DImode
);
12880 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12884 /* Generate DImode references to avoid %fs:(%reg32)
12885 problems and linker IE->LE relaxation bug. */
12888 type
= UNSPEC_GOTNTPOFF
;
12892 if (reload_in_progress
)
12893 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12894 pic
= pic_offset_table_rtx
;
12895 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12897 else if (!TARGET_ANY_GNU_TLS
)
12899 pic
= gen_reg_rtx (Pmode
);
12900 emit_insn (gen_set_got (pic
));
12901 type
= UNSPEC_GOTTPOFF
;
12906 type
= UNSPEC_INDNTPOFF
;
12909 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12910 off
= gen_rtx_CONST (tp_mode
, off
);
12912 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12913 off
= gen_const_mem (tp_mode
, off
);
12914 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12916 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12918 base
= get_thread_pointer (tp_mode
,
12919 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12920 off
= force_reg (tp_mode
, off
);
12921 return gen_rtx_PLUS (tp_mode
, base
, off
);
12925 base
= get_thread_pointer (Pmode
, true);
12926 dest
= gen_reg_rtx (Pmode
);
12927 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12931 case TLS_MODEL_LOCAL_EXEC
:
12932 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12933 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12934 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12935 off
= gen_rtx_CONST (Pmode
, off
);
12937 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12939 base
= get_thread_pointer (Pmode
,
12940 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12941 return gen_rtx_PLUS (Pmode
, base
, off
);
12945 base
= get_thread_pointer (Pmode
, true);
12946 dest
= gen_reg_rtx (Pmode
);
12947 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12952 gcc_unreachable ();
12958 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12961 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12962 htab_t dllimport_map
;
12965 get_dllimport_decl (tree decl
)
12967 struct tree_map
*h
, in
;
12970 const char *prefix
;
12971 size_t namelen
, prefixlen
;
12976 if (!dllimport_map
)
12977 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12979 in
.hash
= htab_hash_pointer (decl
);
12980 in
.base
.from
= decl
;
12981 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12982 h
= (struct tree_map
*) *loc
;
12986 *loc
= h
= ggc_alloc_tree_map ();
12988 h
->base
.from
= decl
;
12989 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12990 VAR_DECL
, NULL
, ptr_type_node
);
12991 DECL_ARTIFICIAL (to
) = 1;
12992 DECL_IGNORED_P (to
) = 1;
12993 DECL_EXTERNAL (to
) = 1;
12994 TREE_READONLY (to
) = 1;
12996 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12997 name
= targetm
.strip_name_encoding (name
);
12998 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12999 ? "*__imp_" : "*__imp__";
13000 namelen
= strlen (name
);
13001 prefixlen
= strlen (prefix
);
13002 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13003 memcpy (imp_name
, prefix
, prefixlen
);
13004 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13006 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13007 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13008 SET_SYMBOL_REF_DECL (rtl
, to
);
13009 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
13011 rtl
= gen_const_mem (Pmode
, rtl
);
13012 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13014 SET_DECL_RTL (to
, rtl
);
13015 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13020 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13021 true if we require the result be a register. */
13024 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13029 gcc_assert (SYMBOL_REF_DECL (symbol
));
13030 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
13032 x
= DECL_RTL (imp_decl
);
13034 x
= force_reg (Pmode
, x
);
13038 /* Try machine-dependent ways of modifying an illegitimate address
13039 to be legitimate. If we find one, return the new, valid address.
13040 This macro is used in only one place: `memory_address' in explow.c.
13042 OLDX is the address as it was before break_out_memory_refs was called.
13043 In some cases it is useful to look at this to decide what needs to be done.
13045 It is always safe for this macro to do nothing. It exists to recognize
13046 opportunities to optimize the output.
13048 For the 80386, we handle X+REG by loading X into a register R and
13049 using R+REG. R will go in a general reg and indexing will be used.
13050 However, if REG is a broken-out memory address or multiplication,
13051 nothing needs to be done because REG can certainly go in a general reg.
13053 When -fpic is used, special handling is needed for symbolic references.
13054 See comments by legitimize_pic_address in i386.c for details. */
13057 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13058 enum machine_mode mode
)
13063 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13065 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13066 if (GET_CODE (x
) == CONST
13067 && GET_CODE (XEXP (x
, 0)) == PLUS
13068 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13069 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13071 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13072 (enum tls_model
) log
, false);
13073 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13076 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13078 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13079 return legitimize_dllimport_symbol (x
, true);
13080 if (GET_CODE (x
) == CONST
13081 && GET_CODE (XEXP (x
, 0)) == PLUS
13082 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13083 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13085 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13086 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13090 if (flag_pic
&& SYMBOLIC_CONST (x
))
13091 return legitimize_pic_address (x
, 0);
13094 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13095 return machopic_indirect_data_reference (x
, 0);
13098 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13099 if (GET_CODE (x
) == ASHIFT
13100 && CONST_INT_P (XEXP (x
, 1))
13101 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13104 log
= INTVAL (XEXP (x
, 1));
13105 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13106 GEN_INT (1 << log
));
13109 if (GET_CODE (x
) == PLUS
)
13111 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13113 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13114 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13115 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13118 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13119 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13120 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13121 GEN_INT (1 << log
));
13124 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13125 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13126 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13129 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13130 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13131 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13132 GEN_INT (1 << log
));
13135 /* Put multiply first if it isn't already. */
13136 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13138 rtx tmp
= XEXP (x
, 0);
13139 XEXP (x
, 0) = XEXP (x
, 1);
13144 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13145 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13146 created by virtual register instantiation, register elimination, and
13147 similar optimizations. */
13148 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13151 x
= gen_rtx_PLUS (Pmode
,
13152 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13153 XEXP (XEXP (x
, 1), 0)),
13154 XEXP (XEXP (x
, 1), 1));
13158 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13159 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13160 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13161 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13162 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13163 && CONSTANT_P (XEXP (x
, 1)))
13166 rtx other
= NULL_RTX
;
13168 if (CONST_INT_P (XEXP (x
, 1)))
13170 constant
= XEXP (x
, 1);
13171 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13173 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13175 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13176 other
= XEXP (x
, 1);
13184 x
= gen_rtx_PLUS (Pmode
,
13185 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13186 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13187 plus_constant (Pmode
, other
,
13188 INTVAL (constant
)));
13192 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13195 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13198 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13201 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13204 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13208 && REG_P (XEXP (x
, 1))
13209 && REG_P (XEXP (x
, 0)))
13212 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13215 x
= legitimize_pic_address (x
, 0);
13218 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13221 if (REG_P (XEXP (x
, 0)))
13223 rtx temp
= gen_reg_rtx (Pmode
);
13224 rtx val
= force_operand (XEXP (x
, 1), temp
);
13227 if (GET_MODE (val
) != Pmode
)
13228 val
= convert_to_mode (Pmode
, val
, 1);
13229 emit_move_insn (temp
, val
);
13232 XEXP (x
, 1) = temp
;
13236 else if (REG_P (XEXP (x
, 1)))
13238 rtx temp
= gen_reg_rtx (Pmode
);
13239 rtx val
= force_operand (XEXP (x
, 0), temp
);
13242 if (GET_MODE (val
) != Pmode
)
13243 val
= convert_to_mode (Pmode
, val
, 1);
13244 emit_move_insn (temp
, val
);
13247 XEXP (x
, 0) = temp
;
13255 /* Print an integer constant expression in assembler syntax. Addition
13256 and subtraction are the only arithmetic that may appear in these
13257 expressions. FILE is the stdio stream to write to, X is the rtx, and
13258 CODE is the operand print code from the output string. */
13261 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13265 switch (GET_CODE (x
))
13268 gcc_assert (flag_pic
);
13273 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13274 output_addr_const (file
, x
);
13277 const char *name
= XSTR (x
, 0);
13279 /* Mark the decl as referenced so that cgraph will
13280 output the function. */
13281 if (SYMBOL_REF_DECL (x
))
13282 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13285 if (MACHOPIC_INDIRECT
13286 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13287 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13289 assemble_name (file
, name
);
13291 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13292 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13293 fputs ("@PLT", file
);
13300 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13301 assemble_name (asm_out_file
, buf
);
13305 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13309 /* This used to output parentheses around the expression,
13310 but that does not work on the 386 (either ATT or BSD assembler). */
13311 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13315 if (GET_MODE (x
) == VOIDmode
)
13317 /* We can use %d if the number is <32 bits and positive. */
13318 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13319 fprintf (file
, "0x%lx%08lx",
13320 (unsigned long) CONST_DOUBLE_HIGH (x
),
13321 (unsigned long) CONST_DOUBLE_LOW (x
));
13323 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13326 /* We can't handle floating point constants;
13327 TARGET_PRINT_OPERAND must handle them. */
13328 output_operand_lossage ("floating constant misused");
13332 /* Some assemblers need integer constants to appear first. */
13333 if (CONST_INT_P (XEXP (x
, 0)))
13335 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13337 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13341 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13342 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13344 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13350 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13351 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13353 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13355 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13359 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13361 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13366 gcc_assert (XVECLEN (x
, 0) == 1);
13367 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13368 switch (XINT (x
, 1))
13371 fputs ("@GOT", file
);
13373 case UNSPEC_GOTOFF
:
13374 fputs ("@GOTOFF", file
);
13376 case UNSPEC_PLTOFF
:
13377 fputs ("@PLTOFF", file
);
13380 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13381 "(%rip)" : "[rip]", file
);
13383 case UNSPEC_GOTPCREL
:
13384 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13385 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13387 case UNSPEC_GOTTPOFF
:
13388 /* FIXME: This might be @TPOFF in Sun ld too. */
13389 fputs ("@gottpoff", file
);
13392 fputs ("@tpoff", file
);
13394 case UNSPEC_NTPOFF
:
13396 fputs ("@tpoff", file
);
13398 fputs ("@ntpoff", file
);
13400 case UNSPEC_DTPOFF
:
13401 fputs ("@dtpoff", file
);
13403 case UNSPEC_GOTNTPOFF
:
13405 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13406 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13408 fputs ("@gotntpoff", file
);
13410 case UNSPEC_INDNTPOFF
:
13411 fputs ("@indntpoff", file
);
13414 case UNSPEC_MACHOPIC_OFFSET
:
13416 machopic_output_function_base_name (file
);
13420 output_operand_lossage ("invalid UNSPEC as operand");
13426 output_operand_lossage ("invalid expression as operand");
13430 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13431 We need to emit DTP-relative relocations. */
13433 static void ATTRIBUTE_UNUSED
13434 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13436 fputs (ASM_LONG
, file
);
13437 output_addr_const (file
, x
);
13438 fputs ("@dtpoff", file
);
13444 fputs (", 0", file
);
13447 gcc_unreachable ();
13451 /* Return true if X is a representation of the PIC register. This copes
13452 with calls from ix86_find_base_term, where the register might have
13453 been replaced by a cselib value. */
13456 ix86_pic_register_p (rtx x
)
13458 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13459 return (pic_offset_table_rtx
13460 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13462 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13465 /* Helper function for ix86_delegitimize_address.
13466 Attempt to delegitimize TLS local-exec accesses. */
13469 ix86_delegitimize_tls_address (rtx orig_x
)
13471 rtx x
= orig_x
, unspec
;
13472 struct ix86_address addr
;
13474 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13478 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13480 if (ix86_decompose_address (x
, &addr
) == 0
13481 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13482 || addr
.disp
== NULL_RTX
13483 || GET_CODE (addr
.disp
) != CONST
)
13485 unspec
= XEXP (addr
.disp
, 0);
13486 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13487 unspec
= XEXP (unspec
, 0);
13488 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13490 x
= XVECEXP (unspec
, 0, 0);
13491 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13492 if (unspec
!= XEXP (addr
.disp
, 0))
13493 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13496 rtx idx
= addr
.index
;
13497 if (addr
.scale
!= 1)
13498 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13499 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13502 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13503 if (MEM_P (orig_x
))
13504 x
= replace_equiv_address_nv (orig_x
, x
);
13508 /* In the name of slightly smaller debug output, and to cater to
13509 general assembler lossage, recognize PIC+GOTOFF and turn it back
13510 into a direct symbol reference.
13512 On Darwin, this is necessary to avoid a crash, because Darwin
13513 has a different PIC label for each routine but the DWARF debugging
13514 information is not associated with any particular routine, so it's
13515 necessary to remove references to the PIC label from RTL stored by
13516 the DWARF output code. */
13519 ix86_delegitimize_address (rtx x
)
13521 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13522 /* addend is NULL or some rtx if x is something+GOTOFF where
13523 something doesn't include the PIC register. */
13524 rtx addend
= NULL_RTX
;
13525 /* reg_addend is NULL or a multiple of some register. */
13526 rtx reg_addend
= NULL_RTX
;
13527 /* const_addend is NULL or a const_int. */
13528 rtx const_addend
= NULL_RTX
;
13529 /* This is the result, or NULL. */
13530 rtx result
= NULL_RTX
;
13539 if (GET_CODE (x
) == CONST
13540 && GET_CODE (XEXP (x
, 0)) == PLUS
13541 && GET_MODE (XEXP (x
, 0)) == Pmode
13542 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13543 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13544 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13546 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13547 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13548 if (MEM_P (orig_x
))
13549 x
= replace_equiv_address_nv (orig_x
, x
);
13552 if (GET_CODE (x
) != CONST
13553 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13554 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13555 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13556 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13557 return ix86_delegitimize_tls_address (orig_x
);
13558 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13559 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13561 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13569 if (GET_CODE (x
) != PLUS
13570 || GET_CODE (XEXP (x
, 1)) != CONST
)
13571 return ix86_delegitimize_tls_address (orig_x
);
13573 if (ix86_pic_register_p (XEXP (x
, 0)))
13574 /* %ebx + GOT/GOTOFF */
13576 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13578 /* %ebx + %reg * scale + GOT/GOTOFF */
13579 reg_addend
= XEXP (x
, 0);
13580 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13581 reg_addend
= XEXP (reg_addend
, 1);
13582 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13583 reg_addend
= XEXP (reg_addend
, 0);
13586 reg_addend
= NULL_RTX
;
13587 addend
= XEXP (x
, 0);
13591 addend
= XEXP (x
, 0);
13593 x
= XEXP (XEXP (x
, 1), 0);
13594 if (GET_CODE (x
) == PLUS
13595 && CONST_INT_P (XEXP (x
, 1)))
13597 const_addend
= XEXP (x
, 1);
13601 if (GET_CODE (x
) == UNSPEC
13602 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13603 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13604 result
= XVECEXP (x
, 0, 0);
13606 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13607 && !MEM_P (orig_x
))
13608 result
= XVECEXP (x
, 0, 0);
13611 return ix86_delegitimize_tls_address (orig_x
);
13614 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13616 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13619 /* If the rest of original X doesn't involve the PIC register, add
13620 addend and subtract pic_offset_table_rtx. This can happen e.g.
13622 leal (%ebx, %ecx, 4), %ecx
13624 movl foo@GOTOFF(%ecx), %edx
13625 in which case we return (%ecx - %ebx) + foo. */
13626 if (pic_offset_table_rtx
)
13627 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13628 pic_offset_table_rtx
),
13633 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13635 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13636 if (result
== NULL_RTX
)
13642 /* If X is a machine specific address (i.e. a symbol or label being
13643 referenced as a displacement from the GOT implemented using an
13644 UNSPEC), then return the base term. Otherwise return X. */
13647 ix86_find_base_term (rtx x
)
13653 if (GET_CODE (x
) != CONST
)
13655 term
= XEXP (x
, 0);
13656 if (GET_CODE (term
) == PLUS
13657 && (CONST_INT_P (XEXP (term
, 1))
13658 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13659 term
= XEXP (term
, 0);
13660 if (GET_CODE (term
) != UNSPEC
13661 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13662 && XINT (term
, 1) != UNSPEC_PCREL
))
13665 return XVECEXP (term
, 0, 0);
13668 return ix86_delegitimize_address (x
);
13672 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13673 bool fp
, FILE *file
)
13675 const char *suffix
;
13677 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13679 code
= ix86_fp_compare_code_to_integer (code
);
13683 code
= reverse_condition (code
);
13734 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13738 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13739 Those same assemblers have the same but opposite lossage on cmov. */
13740 if (mode
== CCmode
)
13741 suffix
= fp
? "nbe" : "a";
13742 else if (mode
== CCCmode
)
13745 gcc_unreachable ();
13761 gcc_unreachable ();
13765 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13782 gcc_unreachable ();
13786 /* ??? As above. */
13787 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13788 suffix
= fp
? "nb" : "ae";
13791 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13795 /* ??? As above. */
13796 if (mode
== CCmode
)
13798 else if (mode
== CCCmode
)
13799 suffix
= fp
? "nb" : "ae";
13801 gcc_unreachable ();
13804 suffix
= fp
? "u" : "p";
13807 suffix
= fp
? "nu" : "np";
13810 gcc_unreachable ();
13812 fputs (suffix
, file
);
13815 /* Print the name of register X to FILE based on its machine mode and number.
13816 If CODE is 'w', pretend the mode is HImode.
13817 If CODE is 'b', pretend the mode is QImode.
13818 If CODE is 'k', pretend the mode is SImode.
13819 If CODE is 'q', pretend the mode is DImode.
13820 If CODE is 'x', pretend the mode is V4SFmode.
13821 If CODE is 't', pretend the mode is V8SFmode.
13822 If CODE is 'h', pretend the reg is the 'high' byte register.
13823 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13824 If CODE is 'd', duplicate the operand for AVX instruction.
13828 print_reg (rtx x
, int code
, FILE *file
)
13831 bool duplicated
= code
== 'd' && TARGET_AVX
;
13833 gcc_assert (x
== pc_rtx
13834 || (REGNO (x
) != ARG_POINTER_REGNUM
13835 && REGNO (x
) != FRAME_POINTER_REGNUM
13836 && REGNO (x
) != FLAGS_REG
13837 && REGNO (x
) != FPSR_REG
13838 && REGNO (x
) != FPCR_REG
));
13840 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13845 gcc_assert (TARGET_64BIT
);
13846 fputs ("rip", file
);
13850 if (code
== 'w' || MMX_REG_P (x
))
13852 else if (code
== 'b')
13854 else if (code
== 'k')
13856 else if (code
== 'q')
13858 else if (code
== 'y')
13860 else if (code
== 'h')
13862 else if (code
== 'x')
13864 else if (code
== 't')
13867 code
= GET_MODE_SIZE (GET_MODE (x
));
13869 /* Irritatingly, AMD extended registers use different naming convention
13870 from the normal registers: "r%d[bwd]" */
13871 if (REX_INT_REG_P (x
))
13873 gcc_assert (TARGET_64BIT
);
13875 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13879 error ("extended registers have no high halves");
13894 error ("unsupported operand size for extended register");
13904 if (STACK_TOP_P (x
))
13913 if (! ANY_FP_REG_P (x
))
13914 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13919 reg
= hi_reg_name
[REGNO (x
)];
13922 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13924 reg
= qi_reg_name
[REGNO (x
)];
13927 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13929 reg
= qi_high_reg_name
[REGNO (x
)];
13934 gcc_assert (!duplicated
);
13936 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13941 gcc_unreachable ();
13947 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13948 fprintf (file
, ", %%%s", reg
);
13950 fprintf (file
, ", %s", reg
);
13954 /* Locate some local-dynamic symbol still in use by this function
13955 so that we can print its name in some tls_local_dynamic_base
13959 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13963 if (GET_CODE (x
) == SYMBOL_REF
13964 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13966 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13973 static const char *
13974 get_some_local_dynamic_name (void)
13978 if (cfun
->machine
->some_ld_name
)
13979 return cfun
->machine
->some_ld_name
;
13981 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13982 if (NONDEBUG_INSN_P (insn
)
13983 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13984 return cfun
->machine
->some_ld_name
;
13989 /* Meaning of CODE:
13990 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13991 C -- print opcode suffix for set/cmov insn.
13992 c -- like C, but print reversed condition
13993 F,f -- likewise, but for floating-point.
13994 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13996 R -- print the prefix for register names.
13997 z -- print the opcode suffix for the size of the current operand.
13998 Z -- likewise, with special suffixes for x87 instructions.
13999 * -- print a star (in certain assembler syntax)
14000 A -- print an absolute memory reference.
14001 E -- print address with DImode register names if TARGET_64BIT.
14002 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14003 s -- print a shift double count, followed by the assemblers argument
14005 b -- print the QImode name of the register for the indicated operand.
14006 %b0 would print %al if operands[0] is reg 0.
14007 w -- likewise, print the HImode name of the register.
14008 k -- likewise, print the SImode name of the register.
14009 q -- likewise, print the DImode name of the register.
14010 x -- likewise, print the V4SFmode name of the register.
14011 t -- likewise, print the V8SFmode name of the register.
14012 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14013 y -- print "st(0)" instead of "st" as a register.
14014 d -- print duplicated register operand for AVX instruction.
14015 D -- print condition for SSE cmp instruction.
14016 P -- if PIC, print an @PLT suffix.
14017 p -- print raw symbol name.
14018 X -- don't print any sort of PIC '@' suffix for a symbol.
14019 & -- print some in-use local-dynamic symbol name.
14020 H -- print a memory address offset by 8; used for sse high-parts
14021 Y -- print condition for XOP pcom* instruction.
14022 + -- print a branch hint as 'cs' or 'ds' prefix
14023 ; -- print a semicolon (after prefixes due to bug in older gas).
14024 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14025 @ -- print a segment register of thread base pointer load
14026 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14030 ix86_print_operand (FILE *file
, rtx x
, int code
)
14037 switch (ASSEMBLER_DIALECT
)
14044 /* Intel syntax. For absolute addresses, registers should not
14045 be surrounded by braces. */
14049 ix86_print_operand (file
, x
, 0);
14056 gcc_unreachable ();
14059 ix86_print_operand (file
, x
, 0);
14063 /* Wrap address in an UNSPEC to declare special handling. */
14065 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14067 output_address (x
);
14071 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14076 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14081 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14086 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14091 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14096 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14101 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14102 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14105 switch (GET_MODE_SIZE (GET_MODE (x
)))
14120 output_operand_lossage
14121 ("invalid operand size for operand code 'O'");
14130 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14132 /* Opcodes don't get size suffixes if using Intel opcodes. */
14133 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14136 switch (GET_MODE_SIZE (GET_MODE (x
)))
14155 output_operand_lossage
14156 ("invalid operand size for operand code 'z'");
14161 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14163 (0, "non-integer operand used with operand code 'z'");
14167 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14168 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14171 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14173 switch (GET_MODE_SIZE (GET_MODE (x
)))
14176 #ifdef HAVE_AS_IX86_FILDS
14186 #ifdef HAVE_AS_IX86_FILDQ
14189 fputs ("ll", file
);
14197 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14199 /* 387 opcodes don't get size suffixes
14200 if the operands are registers. */
14201 if (STACK_REG_P (x
))
14204 switch (GET_MODE_SIZE (GET_MODE (x
)))
14225 output_operand_lossage
14226 ("invalid operand type used with operand code 'Z'");
14230 output_operand_lossage
14231 ("invalid operand size for operand code 'Z'");
14249 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14251 ix86_print_operand (file
, x
, 0);
14252 fputs (", ", file
);
14257 switch (GET_CODE (x
))
14260 fputs ("neq", file
);
14263 fputs ("eq", file
);
14267 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14271 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14275 fputs ("le", file
);
14279 fputs ("lt", file
);
14282 fputs ("unord", file
);
14285 fputs ("ord", file
);
14288 fputs ("ueq", file
);
14291 fputs ("nlt", file
);
14294 fputs ("nle", file
);
14297 fputs ("ule", file
);
14300 fputs ("ult", file
);
14303 fputs ("une", file
);
14306 output_operand_lossage ("operand is not a condition code, "
14307 "invalid operand code 'Y'");
14313 /* Little bit of braindamage here. The SSE compare instructions
14314 does use completely different names for the comparisons that the
14315 fp conditional moves. */
14316 switch (GET_CODE (x
))
14321 fputs ("eq_us", file
);
14325 fputs ("eq", file
);
14330 fputs ("nge", file
);
14334 fputs ("lt", file
);
14339 fputs ("ngt", file
);
14343 fputs ("le", file
);
14346 fputs ("unord", file
);
14351 fputs ("neq_oq", file
);
14355 fputs ("neq", file
);
14360 fputs ("ge", file
);
14364 fputs ("nlt", file
);
14369 fputs ("gt", file
);
14373 fputs ("nle", file
);
14376 fputs ("ord", file
);
14379 output_operand_lossage ("operand is not a condition code, "
14380 "invalid operand code 'D'");
14387 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14388 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14394 if (!COMPARISON_P (x
))
14396 output_operand_lossage ("operand is not a condition code, "
14397 "invalid operand code '%c'", code
);
14400 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14401 code
== 'c' || code
== 'f',
14402 code
== 'F' || code
== 'f',
14407 if (!offsettable_memref_p (x
))
14409 output_operand_lossage ("operand is not an offsettable memory "
14410 "reference, invalid operand code 'H'");
14413 /* It doesn't actually matter what mode we use here, as we're
14414 only going to use this for printing. */
14415 x
= adjust_address_nv (x
, DImode
, 8);
14419 gcc_assert (CONST_INT_P (x
));
14421 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14422 #ifdef HAVE_AS_IX86_HLE
14423 fputs ("xacquire ", file
);
14425 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14427 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14428 #ifdef HAVE_AS_IX86_HLE
14429 fputs ("xrelease ", file
);
14431 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14433 /* We do not want to print value of the operand. */
14437 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14443 const char *name
= get_some_local_dynamic_name ();
14445 output_operand_lossage ("'%%&' used without any "
14446 "local dynamic TLS references");
14448 assemble_name (file
, name
);
14457 || optimize_function_for_size_p (cfun
)
14458 || !TARGET_BRANCH_PREDICTION_HINTS
)
14461 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14464 int pred_val
= INTVAL (XEXP (x
, 0));
14466 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14467 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14469 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14471 = final_forward_branch_p (current_output_insn
) == 0;
14473 /* Emit hints only in the case default branch prediction
14474 heuristics would fail. */
14475 if (taken
!= cputaken
)
14477 /* We use 3e (DS) prefix for taken branches and
14478 2e (CS) prefix for not taken branches. */
14480 fputs ("ds ; ", file
);
14482 fputs ("cs ; ", file
);
14490 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14496 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14499 /* The kernel uses a different segment register for performance
14500 reasons; a system call would not have to trash the userspace
14501 segment register, which would be expensive. */
14502 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14503 fputs ("fs", file
);
14505 fputs ("gs", file
);
14509 putc (TARGET_AVX2
? 'i' : 'f', file
);
14513 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14514 fputs ("addr32 ", file
);
14518 output_operand_lossage ("invalid operand code '%c'", code
);
14523 print_reg (x
, code
, file
);
14525 else if (MEM_P (x
))
14527 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14528 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14529 && GET_MODE (x
) != BLKmode
)
14532 switch (GET_MODE_SIZE (GET_MODE (x
)))
14534 case 1: size
= "BYTE"; break;
14535 case 2: size
= "WORD"; break;
14536 case 4: size
= "DWORD"; break;
14537 case 8: size
= "QWORD"; break;
14538 case 12: size
= "TBYTE"; break;
14540 if (GET_MODE (x
) == XFmode
)
14545 case 32: size
= "YMMWORD"; break;
14547 gcc_unreachable ();
14550 /* Check for explicit size override (codes 'b', 'w', 'k',
14554 else if (code
== 'w')
14556 else if (code
== 'k')
14558 else if (code
== 'q')
14560 else if (code
== 'x')
14563 fputs (size
, file
);
14564 fputs (" PTR ", file
);
14568 /* Avoid (%rip) for call operands. */
14569 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14570 && !CONST_INT_P (x
))
14571 output_addr_const (file
, x
);
14572 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14573 output_operand_lossage ("invalid constraints for operand");
14575 output_address (x
);
14578 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14583 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14584 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14586 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14588 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14590 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14592 fprintf (file
, "0x%08x", (unsigned int) l
);
14595 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14600 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14601 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14603 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14605 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14608 /* These float cases don't actually occur as immediate operands. */
14609 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14613 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14614 fputs (dstr
, file
);
14619 /* We have patterns that allow zero sets of memory, for instance.
14620 In 64-bit mode, we should probably support all 8-byte vectors,
14621 since we can in fact encode that into an immediate. */
14622 if (GET_CODE (x
) == CONST_VECTOR
)
14624 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14628 if (code
!= 'P' && code
!= 'p')
14630 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14632 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14635 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14636 || GET_CODE (x
) == LABEL_REF
)
14638 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14641 fputs ("OFFSET FLAT:", file
);
14644 if (CONST_INT_P (x
))
14645 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14646 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14647 output_pic_addr_const (file
, x
, code
);
14649 output_addr_const (file
, x
);
14654 ix86_print_operand_punct_valid_p (unsigned char code
)
14656 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14657 || code
== ';' || code
== '~' || code
== '^');
14660 /* Print a memory operand whose address is ADDR. */
14663 ix86_print_operand_address (FILE *file
, rtx addr
)
14665 struct ix86_address parts
;
14666 rtx base
, index
, disp
;
14672 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14674 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14675 gcc_assert (parts
.index
== NULL_RTX
);
14676 parts
.index
= XVECEXP (addr
, 0, 1);
14677 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14678 addr
= XVECEXP (addr
, 0, 0);
14681 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14683 gcc_assert (TARGET_64BIT
);
14684 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14688 ok
= ix86_decompose_address (addr
, &parts
);
14692 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14694 rtx tmp
= SUBREG_REG (parts
.base
);
14695 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14696 tmp
, GET_MODE (tmp
), 0);
14699 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14701 rtx tmp
= SUBREG_REG (parts
.index
);
14702 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14703 tmp
, GET_MODE (tmp
), 0);
14707 index
= parts
.index
;
14709 scale
= parts
.scale
;
14717 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14719 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14722 gcc_unreachable ();
14725 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14726 if (TARGET_64BIT
&& !base
&& !index
)
14730 if (GET_CODE (disp
) == CONST
14731 && GET_CODE (XEXP (disp
, 0)) == PLUS
14732 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14733 symbol
= XEXP (XEXP (disp
, 0), 0);
14735 if (GET_CODE (symbol
) == LABEL_REF
14736 || (GET_CODE (symbol
) == SYMBOL_REF
14737 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14740 if (!base
&& !index
)
14742 /* Displacement only requires special attention. */
14744 if (CONST_INT_P (disp
))
14746 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14747 fputs ("ds:", file
);
14748 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14751 output_pic_addr_const (file
, disp
, 0);
14753 output_addr_const (file
, disp
);
14757 /* Print SImode register names for zero-extended
14758 addresses to force addr32 prefix. */
14760 && (GET_CODE (addr
) == ZERO_EXTEND
14761 || GET_CODE (addr
) == AND
))
14763 gcc_assert (!code
);
14767 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14772 output_pic_addr_const (file
, disp
, 0);
14773 else if (GET_CODE (disp
) == LABEL_REF
)
14774 output_asm_label (disp
);
14776 output_addr_const (file
, disp
);
14781 print_reg (base
, code
, file
);
14785 print_reg (index
, vsib
? 0 : code
, file
);
14786 if (scale
!= 1 || vsib
)
14787 fprintf (file
, ",%d", scale
);
14793 rtx offset
= NULL_RTX
;
14797 /* Pull out the offset of a symbol; print any symbol itself. */
14798 if (GET_CODE (disp
) == CONST
14799 && GET_CODE (XEXP (disp
, 0)) == PLUS
14800 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14802 offset
= XEXP (XEXP (disp
, 0), 1);
14803 disp
= gen_rtx_CONST (VOIDmode
,
14804 XEXP (XEXP (disp
, 0), 0));
14808 output_pic_addr_const (file
, disp
, 0);
14809 else if (GET_CODE (disp
) == LABEL_REF
)
14810 output_asm_label (disp
);
14811 else if (CONST_INT_P (disp
))
14814 output_addr_const (file
, disp
);
14820 print_reg (base
, code
, file
);
14823 if (INTVAL (offset
) >= 0)
14825 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14829 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14836 print_reg (index
, vsib
? 0 : code
, file
);
14837 if (scale
!= 1 || vsib
)
14838 fprintf (file
, "*%d", scale
);
14845 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14848 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14852 if (GET_CODE (x
) != UNSPEC
)
14855 op
= XVECEXP (x
, 0, 0);
14856 switch (XINT (x
, 1))
14858 case UNSPEC_GOTTPOFF
:
14859 output_addr_const (file
, op
);
14860 /* FIXME: This might be @TPOFF in Sun ld. */
14861 fputs ("@gottpoff", file
);
14864 output_addr_const (file
, op
);
14865 fputs ("@tpoff", file
);
14867 case UNSPEC_NTPOFF
:
14868 output_addr_const (file
, op
);
14870 fputs ("@tpoff", file
);
14872 fputs ("@ntpoff", file
);
14874 case UNSPEC_DTPOFF
:
14875 output_addr_const (file
, op
);
14876 fputs ("@dtpoff", file
);
14878 case UNSPEC_GOTNTPOFF
:
14879 output_addr_const (file
, op
);
14881 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14882 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14884 fputs ("@gotntpoff", file
);
14886 case UNSPEC_INDNTPOFF
:
14887 output_addr_const (file
, op
);
14888 fputs ("@indntpoff", file
);
14891 case UNSPEC_MACHOPIC_OFFSET
:
14892 output_addr_const (file
, op
);
14894 machopic_output_function_base_name (file
);
14898 case UNSPEC_STACK_CHECK
:
14902 gcc_assert (flag_split_stack
);
14904 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14905 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14907 gcc_unreachable ();
14910 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14921 /* Split one or more double-mode RTL references into pairs of half-mode
14922 references. The RTL can be REG, offsettable MEM, integer constant, or
14923 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14924 split and "num" is its length. lo_half and hi_half are output arrays
14925 that parallel "operands". */
14928 split_double_mode (enum machine_mode mode
, rtx operands
[],
14929 int num
, rtx lo_half
[], rtx hi_half
[])
14931 enum machine_mode half_mode
;
14937 half_mode
= DImode
;
14940 half_mode
= SImode
;
14943 gcc_unreachable ();
14946 byte
= GET_MODE_SIZE (half_mode
);
14950 rtx op
= operands
[num
];
14952 /* simplify_subreg refuse to split volatile memory addresses,
14953 but we still have to handle it. */
14956 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14957 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14961 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14962 GET_MODE (op
) == VOIDmode
14963 ? mode
: GET_MODE (op
), 0);
14964 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14965 GET_MODE (op
) == VOIDmode
14966 ? mode
: GET_MODE (op
), byte
);
14971 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14972 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14973 is the expression of the binary operation. The output may either be
14974 emitted here, or returned to the caller, like all output_* functions.
14976 There is no guarantee that the operands are the same mode, as they
14977 might be within FLOAT or FLOAT_EXTEND expressions. */
14979 #ifndef SYSV386_COMPAT
14980 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14981 wants to fix the assemblers because that causes incompatibility
14982 with gcc. No-one wants to fix gcc because that causes
14983 incompatibility with assemblers... You can use the option of
14984 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14985 #define SYSV386_COMPAT 1
14989 output_387_binary_op (rtx insn
, rtx
*operands
)
14991 static char buf
[40];
14994 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14996 #ifdef ENABLE_CHECKING
14997 /* Even if we do not want to check the inputs, this documents input
14998 constraints. Which helps in understanding the following code. */
14999 if (STACK_REG_P (operands
[0])
15000 && ((REG_P (operands
[1])
15001 && REGNO (operands
[0]) == REGNO (operands
[1])
15002 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15003 || (REG_P (operands
[2])
15004 && REGNO (operands
[0]) == REGNO (operands
[2])
15005 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15006 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15009 gcc_assert (is_sse
);
15012 switch (GET_CODE (operands
[3]))
15015 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15016 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15024 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15025 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15033 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15034 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15042 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15043 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15051 gcc_unreachable ();
15058 strcpy (buf
, ssep
);
15059 if (GET_MODE (operands
[0]) == SFmode
)
15060 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15062 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15066 strcpy (buf
, ssep
+ 1);
15067 if (GET_MODE (operands
[0]) == SFmode
)
15068 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15070 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15076 switch (GET_CODE (operands
[3]))
15080 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15082 rtx temp
= operands
[2];
15083 operands
[2] = operands
[1];
15084 operands
[1] = temp
;
15087 /* know operands[0] == operands[1]. */
15089 if (MEM_P (operands
[2]))
15095 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15097 if (STACK_TOP_P (operands
[0]))
15098 /* How is it that we are storing to a dead operand[2]?
15099 Well, presumably operands[1] is dead too. We can't
15100 store the result to st(0) as st(0) gets popped on this
15101 instruction. Instead store to operands[2] (which I
15102 think has to be st(1)). st(1) will be popped later.
15103 gcc <= 2.8.1 didn't have this check and generated
15104 assembly code that the Unixware assembler rejected. */
15105 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15107 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15111 if (STACK_TOP_P (operands
[0]))
15112 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15114 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15119 if (MEM_P (operands
[1]))
15125 if (MEM_P (operands
[2]))
15131 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15134 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15135 derived assemblers, confusingly reverse the direction of
15136 the operation for fsub{r} and fdiv{r} when the
15137 destination register is not st(0). The Intel assembler
15138 doesn't have this brain damage. Read !SYSV386_COMPAT to
15139 figure out what the hardware really does. */
15140 if (STACK_TOP_P (operands
[0]))
15141 p
= "{p\t%0, %2|rp\t%2, %0}";
15143 p
= "{rp\t%2, %0|p\t%0, %2}";
15145 if (STACK_TOP_P (operands
[0]))
15146 /* As above for fmul/fadd, we can't store to st(0). */
15147 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15149 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15154 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15157 if (STACK_TOP_P (operands
[0]))
15158 p
= "{rp\t%0, %1|p\t%1, %0}";
15160 p
= "{p\t%1, %0|rp\t%0, %1}";
15162 if (STACK_TOP_P (operands
[0]))
15163 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15165 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15170 if (STACK_TOP_P (operands
[0]))
15172 if (STACK_TOP_P (operands
[1]))
15173 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15175 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15178 else if (STACK_TOP_P (operands
[1]))
15181 p
= "{\t%1, %0|r\t%0, %1}";
15183 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15189 p
= "{r\t%2, %0|\t%0, %2}";
15191 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15197 gcc_unreachable ();
15204 /* Return needed mode for entity in optimize_mode_switching pass. */
15207 ix86_mode_needed (int entity
, rtx insn
)
15209 enum attr_i387_cw mode
;
15211 /* The mode UNINITIALIZED is used to store control word after a
15212 function call or ASM pattern. The mode ANY specify that function
15213 has no requirements on the control word and make no changes in the
15214 bits we are interested in. */
15217 || (NONJUMP_INSN_P (insn
)
15218 && (asm_noperands (PATTERN (insn
)) >= 0
15219 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15220 return I387_CW_UNINITIALIZED
;
15222 if (recog_memoized (insn
) < 0)
15223 return I387_CW_ANY
;
15225 mode
= get_attr_i387_cw (insn
);
15230 if (mode
== I387_CW_TRUNC
)
15235 if (mode
== I387_CW_FLOOR
)
15240 if (mode
== I387_CW_CEIL
)
15245 if (mode
== I387_CW_MASK_PM
)
15250 gcc_unreachable ();
15253 return I387_CW_ANY
;
15256 /* Output code to initialize control word copies used by trunc?f?i and
15257 rounding patterns. CURRENT_MODE is set to current control word,
15258 while NEW_MODE is set to new control word. */
15261 emit_i387_cw_initialization (int mode
)
15263 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15266 enum ix86_stack_slot slot
;
15268 rtx reg
= gen_reg_rtx (HImode
);
15270 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15271 emit_move_insn (reg
, copy_rtx (stored_mode
));
15273 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15274 || optimize_function_for_size_p (cfun
))
15278 case I387_CW_TRUNC
:
15279 /* round toward zero (truncate) */
15280 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15281 slot
= SLOT_CW_TRUNC
;
15284 case I387_CW_FLOOR
:
15285 /* round down toward -oo */
15286 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15287 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15288 slot
= SLOT_CW_FLOOR
;
15292 /* round up toward +oo */
15293 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15294 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15295 slot
= SLOT_CW_CEIL
;
15298 case I387_CW_MASK_PM
:
15299 /* mask precision exception for nearbyint() */
15300 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15301 slot
= SLOT_CW_MASK_PM
;
15305 gcc_unreachable ();
15312 case I387_CW_TRUNC
:
15313 /* round toward zero (truncate) */
15314 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15315 slot
= SLOT_CW_TRUNC
;
15318 case I387_CW_FLOOR
:
15319 /* round down toward -oo */
15320 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15321 slot
= SLOT_CW_FLOOR
;
15325 /* round up toward +oo */
15326 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15327 slot
= SLOT_CW_CEIL
;
15330 case I387_CW_MASK_PM
:
15331 /* mask precision exception for nearbyint() */
15332 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15333 slot
= SLOT_CW_MASK_PM
;
15337 gcc_unreachable ();
15341 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15343 new_mode
= assign_386_stack_local (HImode
, slot
);
15344 emit_move_insn (new_mode
, reg
);
15347 /* Output code for INSN to convert a float to a signed int. OPERANDS
15348 are the insn operands. The output may be [HSD]Imode and the input
15349 operand may be [SDX]Fmode. */
15352 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15354 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15355 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15356 int round_mode
= get_attr_i387_cw (insn
);
15358 /* Jump through a hoop or two for DImode, since the hardware has no
15359 non-popping instruction. We used to do this a different way, but
15360 that was somewhat fragile and broke with post-reload splitters. */
15361 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15362 output_asm_insn ("fld\t%y1", operands
);
15364 gcc_assert (STACK_TOP_P (operands
[1]));
15365 gcc_assert (MEM_P (operands
[0]));
15366 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15369 output_asm_insn ("fisttp%Z0\t%0", operands
);
15372 if (round_mode
!= I387_CW_ANY
)
15373 output_asm_insn ("fldcw\t%3", operands
);
15374 if (stack_top_dies
|| dimode_p
)
15375 output_asm_insn ("fistp%Z0\t%0", operands
);
15377 output_asm_insn ("fist%Z0\t%0", operands
);
15378 if (round_mode
!= I387_CW_ANY
)
15379 output_asm_insn ("fldcw\t%2", operands
);
15385 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15386 have the values zero or one, indicates the ffreep insn's operand
15387 from the OPERANDS array. */
15389 static const char *
15390 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15392 if (TARGET_USE_FFREEP
)
15393 #ifdef HAVE_AS_IX86_FFREEP
15394 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15397 static char retval
[32];
15398 int regno
= REGNO (operands
[opno
]);
15400 gcc_assert (FP_REGNO_P (regno
));
15402 regno
-= FIRST_STACK_REG
;
15404 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15409 return opno
? "fstp\t%y1" : "fstp\t%y0";
15413 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15414 should be used. UNORDERED_P is true when fucom should be used. */
15417 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15419 int stack_top_dies
;
15420 rtx cmp_op0
, cmp_op1
;
15421 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15425 cmp_op0
= operands
[0];
15426 cmp_op1
= operands
[1];
15430 cmp_op0
= operands
[1];
15431 cmp_op1
= operands
[2];
15436 if (GET_MODE (operands
[0]) == SFmode
)
15438 return "%vucomiss\t{%1, %0|%0, %1}";
15440 return "%vcomiss\t{%1, %0|%0, %1}";
15443 return "%vucomisd\t{%1, %0|%0, %1}";
15445 return "%vcomisd\t{%1, %0|%0, %1}";
15448 gcc_assert (STACK_TOP_P (cmp_op0
));
15450 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15452 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15454 if (stack_top_dies
)
15456 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15457 return output_387_ffreep (operands
, 1);
15460 return "ftst\n\tfnstsw\t%0";
15463 if (STACK_REG_P (cmp_op1
)
15465 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15466 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15468 /* If both the top of the 387 stack dies, and the other operand
15469 is also a stack register that dies, then this must be a
15470 `fcompp' float compare */
15474 /* There is no double popping fcomi variant. Fortunately,
15475 eflags is immune from the fstp's cc clobbering. */
15477 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15479 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15480 return output_387_ffreep (operands
, 0);
15485 return "fucompp\n\tfnstsw\t%0";
15487 return "fcompp\n\tfnstsw\t%0";
15492 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15494 static const char * const alt
[16] =
15496 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15497 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15498 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15499 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15501 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15502 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15506 "fcomi\t{%y1, %0|%0, %y1}",
15507 "fcomip\t{%y1, %0|%0, %y1}",
15508 "fucomi\t{%y1, %0|%0, %y1}",
15509 "fucomip\t{%y1, %0|%0, %y1}",
15520 mask
= eflags_p
<< 3;
15521 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15522 mask
|= unordered_p
<< 1;
15523 mask
|= stack_top_dies
;
15525 gcc_assert (mask
< 16);
15534 ix86_output_addr_vec_elt (FILE *file
, int value
)
15536 const char *directive
= ASM_LONG
;
15540 directive
= ASM_QUAD
;
15542 gcc_assert (!TARGET_64BIT
);
15545 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15549 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15551 const char *directive
= ASM_LONG
;
15554 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15555 directive
= ASM_QUAD
;
15557 gcc_assert (!TARGET_64BIT
);
15559 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15560 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15561 fprintf (file
, "%s%s%d-%s%d\n",
15562 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15563 else if (HAVE_AS_GOTOFF_IN_DATA
)
15564 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15566 else if (TARGET_MACHO
)
15568 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15569 machopic_output_function_base_name (file
);
15574 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15575 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15578 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15582 ix86_expand_clear (rtx dest
)
15586 /* We play register width games, which are only valid after reload. */
15587 gcc_assert (reload_completed
);
15589 /* Avoid HImode and its attendant prefix byte. */
15590 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15591 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15592 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15594 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15595 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15597 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15598 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15604 /* X is an unchanging MEM. If it is a constant pool reference, return
15605 the constant pool rtx, else NULL. */
15608 maybe_get_pool_constant (rtx x
)
15610 x
= ix86_delegitimize_address (XEXP (x
, 0));
15612 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15613 return get_pool_constant (x
);
15619 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15622 enum tls_model model
;
15627 if (GET_CODE (op1
) == SYMBOL_REF
)
15629 model
= SYMBOL_REF_TLS_MODEL (op1
);
15632 op1
= legitimize_tls_address (op1
, model
, true);
15633 op1
= force_operand (op1
, op0
);
15636 if (GET_MODE (op1
) != mode
)
15637 op1
= convert_to_mode (mode
, op1
, 1);
15639 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15640 && SYMBOL_REF_DLLIMPORT_P (op1
))
15641 op1
= legitimize_dllimport_symbol (op1
, false);
15643 else if (GET_CODE (op1
) == CONST
15644 && GET_CODE (XEXP (op1
, 0)) == PLUS
15645 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15647 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15648 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15651 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15653 tmp
= legitimize_tls_address (symbol
, model
, true);
15654 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15655 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15656 tmp
= legitimize_dllimport_symbol (symbol
, true);
15660 tmp
= force_operand (tmp
, NULL
);
15661 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15662 op0
, 1, OPTAB_DIRECT
);
15665 if (GET_MODE (tmp
) != mode
)
15666 op1
= convert_to_mode (mode
, tmp
, 1);
15670 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15671 && symbolic_operand (op1
, mode
))
15673 if (TARGET_MACHO
&& !TARGET_64BIT
)
15676 /* dynamic-no-pic */
15677 if (MACHOPIC_INDIRECT
)
15679 rtx temp
= ((reload_in_progress
15680 || ((op0
&& REG_P (op0
))
15682 ? op0
: gen_reg_rtx (Pmode
));
15683 op1
= machopic_indirect_data_reference (op1
, temp
);
15685 op1
= machopic_legitimize_pic_address (op1
, mode
,
15686 temp
== op1
? 0 : temp
);
15688 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15690 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15694 if (GET_CODE (op0
) == MEM
)
15695 op1
= force_reg (Pmode
, op1
);
15699 if (GET_CODE (temp
) != REG
)
15700 temp
= gen_reg_rtx (Pmode
);
15701 temp
= legitimize_pic_address (op1
, temp
);
15706 /* dynamic-no-pic */
15712 op1
= force_reg (mode
, op1
);
15713 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15715 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15716 op1
= legitimize_pic_address (op1
, reg
);
15719 if (GET_MODE (op1
) != mode
)
15720 op1
= convert_to_mode (mode
, op1
, 1);
15727 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15728 || !push_operand (op0
, mode
))
15730 op1
= force_reg (mode
, op1
);
15732 if (push_operand (op0
, mode
)
15733 && ! general_no_elim_operand (op1
, mode
))
15734 op1
= copy_to_mode_reg (mode
, op1
);
15736 /* Force large constants in 64bit compilation into register
15737 to get them CSEed. */
15738 if (can_create_pseudo_p ()
15739 && (mode
== DImode
) && TARGET_64BIT
15740 && immediate_operand (op1
, mode
)
15741 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15742 && !register_operand (op0
, mode
)
15744 op1
= copy_to_mode_reg (mode
, op1
);
15746 if (can_create_pseudo_p ()
15747 && FLOAT_MODE_P (mode
)
15748 && GET_CODE (op1
) == CONST_DOUBLE
)
15750 /* If we are loading a floating point constant to a register,
15751 force the value to memory now, since we'll get better code
15752 out the back end. */
15754 op1
= validize_mem (force_const_mem (mode
, op1
));
15755 if (!register_operand (op0
, mode
))
15757 rtx temp
= gen_reg_rtx (mode
);
15758 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15759 emit_move_insn (op0
, temp
);
15765 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15769 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15771 rtx op0
= operands
[0], op1
= operands
[1];
15772 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15774 /* Force constants other than zero into memory. We do not know how
15775 the instructions used to build constants modify the upper 64 bits
15776 of the register, once we have that information we may be able
15777 to handle some of them more efficiently. */
15778 if (can_create_pseudo_p ()
15779 && register_operand (op0
, mode
)
15780 && (CONSTANT_P (op1
)
15781 || (GET_CODE (op1
) == SUBREG
15782 && CONSTANT_P (SUBREG_REG (op1
))))
15783 && !standard_sse_constant_p (op1
))
15784 op1
= validize_mem (force_const_mem (mode
, op1
));
15786 /* We need to check memory alignment for SSE mode since attribute
15787 can make operands unaligned. */
15788 if (can_create_pseudo_p ()
15789 && SSE_REG_MODE_P (mode
)
15790 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15791 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15795 /* ix86_expand_vector_move_misalign() does not like constants ... */
15796 if (CONSTANT_P (op1
)
15797 || (GET_CODE (op1
) == SUBREG
15798 && CONSTANT_P (SUBREG_REG (op1
))))
15799 op1
= validize_mem (force_const_mem (mode
, op1
));
15801 /* ... nor both arguments in memory. */
15802 if (!register_operand (op0
, mode
)
15803 && !register_operand (op1
, mode
))
15804 op1
= force_reg (mode
, op1
);
15806 tmp
[0] = op0
; tmp
[1] = op1
;
15807 ix86_expand_vector_move_misalign (mode
, tmp
);
15811 /* Make operand1 a register if it isn't already. */
15812 if (can_create_pseudo_p ()
15813 && !register_operand (op0
, mode
)
15814 && !register_operand (op1
, mode
))
15816 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15820 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15823 /* Split 32-byte AVX unaligned load and store if needed. */
15826 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15829 rtx (*extract
) (rtx
, rtx
, rtx
);
15830 rtx (*move_unaligned
) (rtx
, rtx
);
15831 enum machine_mode mode
;
15833 switch (GET_MODE (op0
))
15836 gcc_unreachable ();
15838 extract
= gen_avx_vextractf128v32qi
;
15839 move_unaligned
= gen_avx_movdqu256
;
15843 extract
= gen_avx_vextractf128v8sf
;
15844 move_unaligned
= gen_avx_movups256
;
15848 extract
= gen_avx_vextractf128v4df
;
15849 move_unaligned
= gen_avx_movupd256
;
15854 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15856 rtx r
= gen_reg_rtx (mode
);
15857 m
= adjust_address (op1
, mode
, 0);
15858 emit_move_insn (r
, m
);
15859 m
= adjust_address (op1
, mode
, 16);
15860 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15861 emit_move_insn (op0
, r
);
15863 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15865 m
= adjust_address (op0
, mode
, 0);
15866 emit_insn (extract (m
, op1
, const0_rtx
));
15867 m
= adjust_address (op0
, mode
, 16);
15868 emit_insn (extract (m
, op1
, const1_rtx
));
15871 emit_insn (move_unaligned (op0
, op1
));
15874 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15875 straight to ix86_expand_vector_move. */
15876 /* Code generation for scalar reg-reg moves of single and double precision data:
15877 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15881 if (x86_sse_partial_reg_dependency == true)
15886 Code generation for scalar loads of double precision data:
15887 if (x86_sse_split_regs == true)
15888 movlpd mem, reg (gas syntax)
15892 Code generation for unaligned packed loads of single precision data
15893 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15894 if (x86_sse_unaligned_move_optimal)
15897 if (x86_sse_partial_reg_dependency == true)
15909 Code generation for unaligned packed loads of double precision data
15910 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15911 if (x86_sse_unaligned_move_optimal)
15914 if (x86_sse_split_regs == true)
15927 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15935 && GET_MODE_SIZE (mode
) == 32)
15937 switch (GET_MODE_CLASS (mode
))
15939 case MODE_VECTOR_INT
:
15941 op0
= gen_lowpart (V32QImode
, op0
);
15942 op1
= gen_lowpart (V32QImode
, op1
);
15945 case MODE_VECTOR_FLOAT
:
15946 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15950 gcc_unreachable ();
15958 /* ??? If we have typed data, then it would appear that using
15959 movdqu is the only way to get unaligned data loaded with
15961 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15963 op0
= gen_lowpart (V16QImode
, op0
);
15964 op1
= gen_lowpart (V16QImode
, op1
);
15965 /* We will eventually emit movups based on insn attributes. */
15966 emit_insn (gen_sse2_movdqu (op0
, op1
));
15968 else if (TARGET_SSE2
&& mode
== V2DFmode
)
15973 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
15974 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
15975 || optimize_function_for_size_p (cfun
))
15977 /* We will eventually emit movups based on insn attributes. */
15978 emit_insn (gen_sse2_movupd (op0
, op1
));
15982 /* When SSE registers are split into halves, we can avoid
15983 writing to the top half twice. */
15984 if (TARGET_SSE_SPLIT_REGS
)
15986 emit_clobber (op0
);
15991 /* ??? Not sure about the best option for the Intel chips.
15992 The following would seem to satisfy; the register is
15993 entirely cleared, breaking the dependency chain. We
15994 then store to the upper half, with a dependency depth
15995 of one. A rumor has it that Intel recommends two movsd
15996 followed by an unpacklpd, but this is unconfirmed. And
15997 given that the dependency depth of the unpacklpd would
15998 still be one, I'm not sure why this would be better. */
15999 zero
= CONST0_RTX (V2DFmode
);
16002 m
= adjust_address (op1
, DFmode
, 0);
16003 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16004 m
= adjust_address (op1
, DFmode
, 8);
16005 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16010 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16011 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16012 || optimize_function_for_size_p (cfun
))
16014 op0
= gen_lowpart (V4SFmode
, op0
);
16015 op1
= gen_lowpart (V4SFmode
, op1
);
16016 emit_insn (gen_sse_movups (op0
, op1
));
16020 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16021 emit_move_insn (op0
, CONST0_RTX (mode
));
16023 emit_clobber (op0
);
16025 if (mode
!= V4SFmode
)
16026 op0
= gen_lowpart (V4SFmode
, op0
);
16028 m
= adjust_address (op1
, V2SFmode
, 0);
16029 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16030 m
= adjust_address (op1
, V2SFmode
, 8);
16031 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16034 else if (MEM_P (op0
))
16036 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16038 op0
= gen_lowpart (V16QImode
, op0
);
16039 op1
= gen_lowpart (V16QImode
, op1
);
16040 /* We will eventually emit movups based on insn attributes. */
16041 emit_insn (gen_sse2_movdqu (op0
, op1
));
16043 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16046 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16047 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16048 || optimize_function_for_size_p (cfun
))
16049 /* We will eventually emit movups based on insn attributes. */
16050 emit_insn (gen_sse2_movupd (op0
, op1
));
16053 m
= adjust_address (op0
, DFmode
, 0);
16054 emit_insn (gen_sse2_storelpd (m
, op1
));
16055 m
= adjust_address (op0
, DFmode
, 8);
16056 emit_insn (gen_sse2_storehpd (m
, op1
));
16061 if (mode
!= V4SFmode
)
16062 op1
= gen_lowpart (V4SFmode
, op1
);
16065 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16066 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16067 || optimize_function_for_size_p (cfun
))
16069 op0
= gen_lowpart (V4SFmode
, op0
);
16070 emit_insn (gen_sse_movups (op0
, op1
));
16074 m
= adjust_address (op0
, V2SFmode
, 0);
16075 emit_insn (gen_sse_storelps (m
, op1
));
16076 m
= adjust_address (op0
, V2SFmode
, 8);
16077 emit_insn (gen_sse_storehps (m
, op1
));
16082 gcc_unreachable ();
16085 /* Expand a push in MODE. This is some mode for which we do not support
16086 proper push instructions, at least from the registers that we expect
16087 the value to live in. */
16090 ix86_expand_push (enum machine_mode mode
, rtx x
)
16094 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16095 GEN_INT (-GET_MODE_SIZE (mode
)),
16096 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16097 if (tmp
!= stack_pointer_rtx
)
16098 emit_move_insn (stack_pointer_rtx
, tmp
);
16100 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16102 /* When we push an operand onto stack, it has to be aligned at least
16103 at the function argument boundary. However since we don't have
16104 the argument type, we can't determine the actual argument
16106 emit_move_insn (tmp
, x
);
16109 /* Helper function of ix86_fixup_binary_operands to canonicalize
16110 operand order. Returns true if the operands should be swapped. */
16113 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16116 rtx dst
= operands
[0];
16117 rtx src1
= operands
[1];
16118 rtx src2
= operands
[2];
16120 /* If the operation is not commutative, we can't do anything. */
16121 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16124 /* Highest priority is that src1 should match dst. */
16125 if (rtx_equal_p (dst
, src1
))
16127 if (rtx_equal_p (dst
, src2
))
16130 /* Next highest priority is that immediate constants come second. */
16131 if (immediate_operand (src2
, mode
))
16133 if (immediate_operand (src1
, mode
))
16136 /* Lowest priority is that memory references should come second. */
16146 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16147 destination to use for the operation. If different from the true
16148 destination in operands[0], a copy operation will be required. */
16151 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16154 rtx dst
= operands
[0];
16155 rtx src1
= operands
[1];
16156 rtx src2
= operands
[2];
16158 /* Canonicalize operand order. */
16159 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16163 /* It is invalid to swap operands of different modes. */
16164 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16171 /* Both source operands cannot be in memory. */
16172 if (MEM_P (src1
) && MEM_P (src2
))
16174 /* Optimization: Only read from memory once. */
16175 if (rtx_equal_p (src1
, src2
))
16177 src2
= force_reg (mode
, src2
);
16181 src2
= force_reg (mode
, src2
);
16184 /* If the destination is memory, and we do not have matching source
16185 operands, do things in registers. */
16186 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16187 dst
= gen_reg_rtx (mode
);
16189 /* Source 1 cannot be a constant. */
16190 if (CONSTANT_P (src1
))
16191 src1
= force_reg (mode
, src1
);
16193 /* Source 1 cannot be a non-matching memory. */
16194 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16195 src1
= force_reg (mode
, src1
);
16197 /* Improve address combine. */
16199 && GET_MODE_CLASS (mode
) == MODE_INT
16201 src2
= force_reg (mode
, src2
);
16203 operands
[1] = src1
;
16204 operands
[2] = src2
;
16208 /* Similarly, but assume that the destination has already been
16209 set up properly. */
16212 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16213 enum machine_mode mode
, rtx operands
[])
16215 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16216 gcc_assert (dst
== operands
[0]);
16219 /* Attempt to expand a binary operator. Make the expansion closer to the
16220 actual machine, then just general_operand, which will allow 3 separate
16221 memory references (one output, two input) in a single insn. */
16224 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16227 rtx src1
, src2
, dst
, op
, clob
;
16229 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16230 src1
= operands
[1];
16231 src2
= operands
[2];
16233 /* Emit the instruction. */
16235 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16236 if (reload_in_progress
)
16238 /* Reload doesn't know about the flags register, and doesn't know that
16239 it doesn't want to clobber it. We can only do this with PLUS. */
16240 gcc_assert (code
== PLUS
);
16243 else if (reload_completed
16245 && !rtx_equal_p (dst
, src1
))
16247 /* This is going to be an LEA; avoid splitting it later. */
16252 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16253 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16256 /* Fix up the destination if needed. */
16257 if (dst
!= operands
[0])
16258 emit_move_insn (operands
[0], dst
);
16261 /* Return TRUE or FALSE depending on whether the binary operator meets the
16262 appropriate constraints. */
16265 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16268 rtx dst
= operands
[0];
16269 rtx src1
= operands
[1];
16270 rtx src2
= operands
[2];
16272 /* Both source operands cannot be in memory. */
16273 if (MEM_P (src1
) && MEM_P (src2
))
16276 /* Canonicalize operand order for commutative operators. */
16277 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16284 /* If the destination is memory, we must have a matching source operand. */
16285 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16288 /* Source 1 cannot be a constant. */
16289 if (CONSTANT_P (src1
))
16292 /* Source 1 cannot be a non-matching memory. */
16293 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16294 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16295 return (code
== AND
16298 || (TARGET_64BIT
&& mode
== DImode
))
16299 && satisfies_constraint_L (src2
));
16304 /* Attempt to expand a unary operator. Make the expansion closer to the
16305 actual machine, then just general_operand, which will allow 2 separate
16306 memory references (one output, one input) in a single insn. */
16309 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16312 int matching_memory
;
16313 rtx src
, dst
, op
, clob
;
16318 /* If the destination is memory, and we do not have matching source
16319 operands, do things in registers. */
16320 matching_memory
= 0;
16323 if (rtx_equal_p (dst
, src
))
16324 matching_memory
= 1;
16326 dst
= gen_reg_rtx (mode
);
16329 /* When source operand is memory, destination must match. */
16330 if (MEM_P (src
) && !matching_memory
)
16331 src
= force_reg (mode
, src
);
16333 /* Emit the instruction. */
16335 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16336 if (reload_in_progress
|| code
== NOT
)
16338 /* Reload doesn't know about the flags register, and doesn't know that
16339 it doesn't want to clobber it. */
16340 gcc_assert (code
== NOT
);
16345 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16346 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16349 /* Fix up the destination if needed. */
16350 if (dst
!= operands
[0])
16351 emit_move_insn (operands
[0], dst
);
16354 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16355 divisor are within the range [0-255]. */
16358 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16361 rtx end_label
, qimode_label
;
16362 rtx insn
, div
, mod
;
16363 rtx scratch
, tmp0
, tmp1
, tmp2
;
16364 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16365 rtx (*gen_zero_extend
) (rtx
, rtx
);
16366 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16371 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16372 gen_test_ccno_1
= gen_testsi_ccno_1
;
16373 gen_zero_extend
= gen_zero_extendqisi2
;
16376 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16377 gen_test_ccno_1
= gen_testdi_ccno_1
;
16378 gen_zero_extend
= gen_zero_extendqidi2
;
16381 gcc_unreachable ();
16384 end_label
= gen_label_rtx ();
16385 qimode_label
= gen_label_rtx ();
16387 scratch
= gen_reg_rtx (mode
);
16389 /* Use 8bit unsigned divimod if dividend and divisor are within
16390 the range [0-255]. */
16391 emit_move_insn (scratch
, operands
[2]);
16392 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16393 scratch
, 1, OPTAB_DIRECT
);
16394 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16395 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16396 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16397 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16398 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16400 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16401 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16402 JUMP_LABEL (insn
) = qimode_label
;
16404 /* Generate original signed/unsigned divimod. */
16405 div
= gen_divmod4_1 (operands
[0], operands
[1],
16406 operands
[2], operands
[3]);
16409 /* Branch to the end. */
16410 emit_jump_insn (gen_jump (end_label
));
16413 /* Generate 8bit unsigned divide. */
16414 emit_label (qimode_label
);
16415 /* Don't use operands[0] for result of 8bit divide since not all
16416 registers support QImode ZERO_EXTRACT. */
16417 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16418 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16419 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16420 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16424 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16425 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16429 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16430 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16433 /* Extract remainder from AH. */
16434 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16435 if (REG_P (operands
[1]))
16436 insn
= emit_move_insn (operands
[1], tmp1
);
16439 /* Need a new scratch register since the old one has result
16441 scratch
= gen_reg_rtx (mode
);
16442 emit_move_insn (scratch
, tmp1
);
16443 insn
= emit_move_insn (operands
[1], scratch
);
16445 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16447 /* Zero extend quotient from AL. */
16448 tmp1
= gen_lowpart (QImode
, tmp0
);
16449 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16450 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16452 emit_label (end_label
);
16455 #define LEA_MAX_STALL (3)
16456 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16458 /* Increase given DISTANCE in half-cycles according to
16459 dependencies between PREV and NEXT instructions.
16460 Add 1 half-cycle if there is no dependency and
16461 go to next cycle if there is some dependecy. */
16463 static unsigned int
16464 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16469 if (!prev
|| !next
)
16470 return distance
+ (distance
& 1) + 2;
16472 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16473 return distance
+ 1;
16475 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16476 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16477 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16478 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16479 return distance
+ (distance
& 1) + 2;
16481 return distance
+ 1;
16484 /* Function checks if instruction INSN defines register number
16485 REGNO1 or REGNO2. */
16488 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16493 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16494 if (DF_REF_REG_DEF_P (*def_rec
)
16495 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16496 && (regno1
== DF_REF_REGNO (*def_rec
)
16497 || regno2
== DF_REF_REGNO (*def_rec
)))
16505 /* Function checks if instruction INSN uses register number
16506 REGNO as a part of address expression. */
16509 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16513 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16514 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16520 /* Search backward for non-agu definition of register number REGNO1
16521 or register number REGNO2 in basic block starting from instruction
16522 START up to head of basic block or instruction INSN.
16524 Function puts true value into *FOUND var if definition was found
16525 and false otherwise.
16527 Distance in half-cycles between START and found instruction or head
16528 of BB is added to DISTANCE and returned. */
16531 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16532 rtx insn
, int distance
,
16533 rtx start
, bool *found
)
16535 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16543 && distance
< LEA_SEARCH_THRESHOLD
)
16545 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16547 distance
= increase_distance (prev
, next
, distance
);
16548 if (insn_defines_reg (regno1
, regno2
, prev
))
16550 if (recog_memoized (prev
) < 0
16551 || get_attr_type (prev
) != TYPE_LEA
)
16560 if (prev
== BB_HEAD (bb
))
16563 prev
= PREV_INSN (prev
);
16569 /* Search backward for non-agu definition of register number REGNO1
16570 or register number REGNO2 in INSN's basic block until
16571 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16572 2. Reach neighbour BBs boundary, or
16573 3. Reach agu definition.
16574 Returns the distance between the non-agu definition point and INSN.
16575 If no definition point, returns -1. */
16578 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16581 basic_block bb
= BLOCK_FOR_INSN (insn
);
16583 bool found
= false;
16585 if (insn
!= BB_HEAD (bb
))
16586 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16587 distance
, PREV_INSN (insn
),
16590 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16594 bool simple_loop
= false;
16596 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16599 simple_loop
= true;
16604 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16606 BB_END (bb
), &found
);
16609 int shortest_dist
= -1;
16610 bool found_in_bb
= false;
16612 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16615 = distance_non_agu_define_in_bb (regno1
, regno2
,
16621 if (shortest_dist
< 0)
16622 shortest_dist
= bb_dist
;
16623 else if (bb_dist
> 0)
16624 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16630 distance
= shortest_dist
;
16634 /* get_attr_type may modify recog data. We want to make sure
16635 that recog data is valid for instruction INSN, on which
16636 distance_non_agu_define is called. INSN is unchanged here. */
16637 extract_insn_cached (insn
);
16642 return distance
>> 1;
16645 /* Return the distance in half-cycles between INSN and the next
16646 insn that uses register number REGNO in memory address added
16647 to DISTANCE. Return -1 if REGNO0 is set.
16649 Put true value into *FOUND if register usage was found and
16651 Put true value into *REDEFINED if register redefinition was
16652 found and false otherwise. */
16655 distance_agu_use_in_bb (unsigned int regno
,
16656 rtx insn
, int distance
, rtx start
,
16657 bool *found
, bool *redefined
)
16659 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16664 *redefined
= false;
16668 && distance
< LEA_SEARCH_THRESHOLD
)
16670 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16672 distance
= increase_distance(prev
, next
, distance
);
16673 if (insn_uses_reg_mem (regno
, next
))
16675 /* Return DISTANCE if OP0 is used in memory
16676 address in NEXT. */
16681 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16683 /* Return -1 if OP0 is set in NEXT. */
16691 if (next
== BB_END (bb
))
16694 next
= NEXT_INSN (next
);
16700 /* Return the distance between INSN and the next insn that uses
16701 register number REGNO0 in memory address. Return -1 if no such
16702 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16705 distance_agu_use (unsigned int regno0
, rtx insn
)
16707 basic_block bb
= BLOCK_FOR_INSN (insn
);
16709 bool found
= false;
16710 bool redefined
= false;
16712 if (insn
!= BB_END (bb
))
16713 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16715 &found
, &redefined
);
16717 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16721 bool simple_loop
= false;
16723 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16726 simple_loop
= true;
16731 distance
= distance_agu_use_in_bb (regno0
, insn
,
16732 distance
, BB_HEAD (bb
),
16733 &found
, &redefined
);
16736 int shortest_dist
= -1;
16737 bool found_in_bb
= false;
16738 bool redefined_in_bb
= false;
16740 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16743 = distance_agu_use_in_bb (regno0
, insn
,
16744 distance
, BB_HEAD (e
->dest
),
16745 &found_in_bb
, &redefined_in_bb
);
16748 if (shortest_dist
< 0)
16749 shortest_dist
= bb_dist
;
16750 else if (bb_dist
> 0)
16751 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16757 distance
= shortest_dist
;
16761 if (!found
|| redefined
)
16764 return distance
>> 1;
16767 /* Define this macro to tune LEA priority vs ADD, it take effect when
16768 there is a dilemma of choicing LEA or ADD
16769 Negative value: ADD is more preferred than LEA
16771 Positive value: LEA is more preferred than ADD*/
16772 #define IX86_LEA_PRIORITY 0
16774 /* Return true if usage of lea INSN has performance advantage
16775 over a sequence of instructions. Instructions sequence has
16776 SPLIT_COST cycles higher latency than lea latency. */
16779 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16780 unsigned int regno2
, unsigned int split_cost
)
16782 int dist_define
, dist_use
;
16784 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16785 dist_use
= distance_agu_use (regno0
, insn
);
16787 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16789 /* If there is no non AGU operand definition, no AGU
16790 operand usage and split cost is 0 then both lea
16791 and non lea variants have same priority. Currently
16792 we prefer lea for 64 bit code and non lea on 32 bit
16794 if (dist_use
< 0 && split_cost
== 0)
16795 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16800 /* With longer definitions distance lea is more preferable.
16801 Here we change it to take into account splitting cost and
16803 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16805 /* If there is no use in memory addess then we just check
16806 that split cost does not exceed AGU stall. */
16808 return dist_define
>= LEA_MAX_STALL
;
16810 /* If this insn has both backward non-agu dependence and forward
16811 agu dependence, the one with short distance takes effect. */
16812 return dist_define
>= dist_use
;
16815 /* Return true if it is legal to clobber flags by INSN and
16816 false otherwise. */
16819 ix86_ok_to_clobber_flags (rtx insn
)
16821 basic_block bb
= BLOCK_FOR_INSN (insn
);
16827 if (NONDEBUG_INSN_P (insn
))
16829 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16830 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16833 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16837 if (insn
== BB_END (bb
))
16840 insn
= NEXT_INSN (insn
);
16843 live
= df_get_live_out(bb
);
16844 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16847 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16848 move and add to avoid AGU stalls. */
16851 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16853 unsigned int regno0
= true_regnum (operands
[0]);
16854 unsigned int regno1
= true_regnum (operands
[1]);
16855 unsigned int regno2
= true_regnum (operands
[2]);
16857 /* Check if we need to optimize. */
16858 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16861 /* Check it is correct to split here. */
16862 if (!ix86_ok_to_clobber_flags(insn
))
16865 /* We need to split only adds with non destructive
16866 destination operand. */
16867 if (regno0
== regno1
|| regno0
== regno2
)
16870 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16873 /* Return true if we should emit lea instruction instead of mov
16877 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
16879 unsigned int regno0
;
16880 unsigned int regno1
;
16882 /* Check if we need to optimize. */
16883 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16886 /* Use lea for reg to reg moves only. */
16887 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
16890 regno0
= true_regnum (operands
[0]);
16891 regno1
= true_regnum (operands
[1]);
16893 return ix86_lea_outperforms (insn
, regno0
, regno1
, -1, 0);
16896 /* Return true if we need to split lea into a sequence of
16897 instructions to avoid AGU stalls. */
16900 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
16902 unsigned int regno0
= true_regnum (operands
[0]) ;
16903 unsigned int regno1
= -1;
16904 unsigned int regno2
= -1;
16905 unsigned int split_cost
= 0;
16906 struct ix86_address parts
;
16909 /* Check we need to optimize. */
16910 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16913 /* Check it is correct to split here. */
16914 if (!ix86_ok_to_clobber_flags(insn
))
16917 ok
= ix86_decompose_address (operands
[1], &parts
);
16920 /* We should not split into add if non legitimate pic
16921 operand is used as displacement. */
16922 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16926 regno1
= true_regnum (parts
.base
);
16928 regno2
= true_regnum (parts
.index
);
16930 /* Compute how many cycles we will add to execution time
16931 if split lea into a sequence of instructions. */
16932 if (parts
.base
|| parts
.index
)
16934 /* Have to use mov instruction if non desctructive
16935 destination form is used. */
16936 if (regno1
!= regno0
&& regno2
!= regno0
)
16939 /* Have to add index to base if both exist. */
16940 if (parts
.base
&& parts
.index
)
16943 /* Have to use shift and adds if scale is 2 or greater. */
16944 if (parts
.scale
> 1)
16946 if (regno0
!= regno1
)
16948 else if (regno2
== regno0
)
16951 split_cost
+= parts
.scale
;
16954 /* Have to use add instruction with immediate if
16955 disp is non zero. */
16956 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16959 /* Subtract the price of lea. */
16963 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
16966 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16967 matches destination. RTX includes clobber of FLAGS_REG. */
16970 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
16975 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
16976 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16978 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16981 /* Split lea instructions into a sequence of instructions
16982 which are executed on ALU to avoid AGU stalls.
16983 It is assumed that it is allowed to clobber flags register
16984 at lea position. */
16987 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
16989 unsigned int regno0
= true_regnum (operands
[0]) ;
16990 unsigned int regno1
= INVALID_REGNUM
;
16991 unsigned int regno2
= INVALID_REGNUM
;
16992 struct ix86_address parts
;
16996 ok
= ix86_decompose_address (operands
[1], &parts
);
17001 if (GET_MODE (parts
.base
) != mode
)
17002 parts
.base
= gen_rtx_SUBREG (mode
, parts
.base
, 0);
17003 regno1
= true_regnum (parts
.base
);
17008 if (GET_MODE (parts
.index
) != mode
)
17009 parts
.index
= gen_rtx_SUBREG (mode
, parts
.index
, 0);
17010 regno2
= true_regnum (parts
.index
);
17013 if (parts
.scale
> 1)
17015 /* Case r1 = r1 + ... */
17016 if (regno1
== regno0
)
17018 /* If we have a case r1 = r1 + C * r1 then we
17019 should use multiplication which is very
17020 expensive. Assume cost model is wrong if we
17021 have such case here. */
17022 gcc_assert (regno2
!= regno0
);
17024 for (adds
= parts
.scale
; adds
> 0; adds
--)
17025 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.index
);
17029 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17030 if (regno0
!= regno2
)
17031 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
17033 /* Use shift for scaling. */
17034 ix86_emit_binop (ASHIFT
, mode
, operands
[0],
17035 GEN_INT (exact_log2 (parts
.scale
)));
17038 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.base
);
17040 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17041 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
17044 else if (!parts
.base
&& !parts
.index
)
17046 gcc_assert(parts
.disp
);
17047 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.disp
));
17053 if (regno0
!= regno2
)
17054 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
17056 else if (!parts
.index
)
17058 if (regno0
!= regno1
)
17059 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
17063 if (regno0
== regno1
)
17065 else if (regno0
== regno2
)
17069 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
17073 ix86_emit_binop (PLUS
, mode
, operands
[0], tmp
);
17076 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17077 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
17081 /* Return true if it is ok to optimize an ADD operation to LEA
17082 operation to avoid flag register consumation. For most processors,
17083 ADD is faster than LEA. For the processors like ATOM, if the
17084 destination register of LEA holds an actual address which will be
17085 used soon, LEA is better and otherwise ADD is better. */
17088 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17090 unsigned int regno0
= true_regnum (operands
[0]);
17091 unsigned int regno1
= true_regnum (operands
[1]);
17092 unsigned int regno2
= true_regnum (operands
[2]);
17094 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17095 if (regno0
!= regno1
&& regno0
!= regno2
)
17098 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17101 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17104 /* Return true if destination reg of SET_BODY is shift count of
17108 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17114 /* Retrieve destination of SET_BODY. */
17115 switch (GET_CODE (set_body
))
17118 set_dest
= SET_DEST (set_body
);
17119 if (!set_dest
|| !REG_P (set_dest
))
17123 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17124 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17132 /* Retrieve shift count of USE_BODY. */
17133 switch (GET_CODE (use_body
))
17136 shift_rtx
= XEXP (use_body
, 1);
17139 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17140 if (ix86_dep_by_shift_count_body (set_body
,
17141 XVECEXP (use_body
, 0, i
)))
17149 && (GET_CODE (shift_rtx
) == ASHIFT
17150 || GET_CODE (shift_rtx
) == LSHIFTRT
17151 || GET_CODE (shift_rtx
) == ASHIFTRT
17152 || GET_CODE (shift_rtx
) == ROTATE
17153 || GET_CODE (shift_rtx
) == ROTATERT
))
17155 rtx shift_count
= XEXP (shift_rtx
, 1);
17157 /* Return true if shift count is dest of SET_BODY. */
17158 if (REG_P (shift_count
)
17159 && true_regnum (set_dest
) == true_regnum (shift_count
))
17166 /* Return true if destination reg of SET_INSN is shift count of
17170 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17172 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17173 PATTERN (use_insn
));
17176 /* Return TRUE or FALSE depending on whether the unary operator meets the
17177 appropriate constraints. */
17180 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17181 enum machine_mode mode ATTRIBUTE_UNUSED
,
17182 rtx operands
[2] ATTRIBUTE_UNUSED
)
17184 /* If one of operands is memory, source and destination must match. */
17185 if ((MEM_P (operands
[0])
17186 || MEM_P (operands
[1]))
17187 && ! rtx_equal_p (operands
[0], operands
[1]))
17192 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17193 are ok, keeping in mind the possible movddup alternative. */
17196 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17198 if (MEM_P (operands
[0]))
17199 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17200 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17201 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17205 /* Post-reload splitter for converting an SF or DFmode value in an
17206 SSE register into an unsigned SImode. */
17209 ix86_split_convert_uns_si_sse (rtx operands
[])
17211 enum machine_mode vecmode
;
17212 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17214 large
= operands
[1];
17215 zero_or_two31
= operands
[2];
17216 input
= operands
[3];
17217 two31
= operands
[4];
17218 vecmode
= GET_MODE (large
);
17219 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17221 /* Load up the value into the low element. We must ensure that the other
17222 elements are valid floats -- zero is the easiest such value. */
17225 if (vecmode
== V4SFmode
)
17226 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17228 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17232 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17233 emit_move_insn (value
, CONST0_RTX (vecmode
));
17234 if (vecmode
== V4SFmode
)
17235 emit_insn (gen_sse_movss (value
, value
, input
));
17237 emit_insn (gen_sse2_movsd (value
, value
, input
));
17240 emit_move_insn (large
, two31
);
17241 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17243 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17244 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17246 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17247 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17249 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17250 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17252 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17253 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17255 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17256 if (vecmode
== V4SFmode
)
17257 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17259 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17262 emit_insn (gen_xorv4si3 (value
, value
, large
));
17265 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17266 Expects the 64-bit DImode to be supplied in a pair of integral
17267 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17268 -mfpmath=sse, !optimize_size only. */
17271 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17273 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17274 rtx int_xmm
, fp_xmm
;
17275 rtx biases
, exponents
;
17278 int_xmm
= gen_reg_rtx (V4SImode
);
17279 if (TARGET_INTER_UNIT_MOVES
)
17280 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17281 else if (TARGET_SSE_SPLIT_REGS
)
17283 emit_clobber (int_xmm
);
17284 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17288 x
= gen_reg_rtx (V2DImode
);
17289 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17290 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17293 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17294 gen_rtvec (4, GEN_INT (0x43300000UL
),
17295 GEN_INT (0x45300000UL
),
17296 const0_rtx
, const0_rtx
));
17297 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17299 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17300 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17302 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17303 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17304 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17305 (0x1.0p84 + double(fp_value_hi_xmm)).
17306 Note these exponents differ by 32. */
17308 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17310 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17311 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17312 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17313 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17314 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17315 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17316 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17317 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17318 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17320 /* Add the upper and lower DFmode values together. */
17322 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17325 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17326 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17327 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17330 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17333 /* Not used, but eases macroization of patterns. */
17335 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17336 rtx input ATTRIBUTE_UNUSED
)
17338 gcc_unreachable ();
17341 /* Convert an unsigned SImode value into a DFmode. Only currently used
17342 for SSE, but applicable anywhere. */
17345 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17347 REAL_VALUE_TYPE TWO31r
;
17350 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17351 NULL
, 1, OPTAB_DIRECT
);
17353 fp
= gen_reg_rtx (DFmode
);
17354 emit_insn (gen_floatsidf2 (fp
, x
));
17356 real_ldexp (&TWO31r
, &dconst1
, 31);
17357 x
= const_double_from_real_value (TWO31r
, DFmode
);
17359 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17361 emit_move_insn (target
, x
);
17364 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17365 32-bit mode; otherwise we have a direct convert instruction. */
17368 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17370 REAL_VALUE_TYPE TWO32r
;
17371 rtx fp_lo
, fp_hi
, x
;
17373 fp_lo
= gen_reg_rtx (DFmode
);
17374 fp_hi
= gen_reg_rtx (DFmode
);
17376 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17378 real_ldexp (&TWO32r
, &dconst1
, 32);
17379 x
= const_double_from_real_value (TWO32r
, DFmode
);
17380 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17382 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17384 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17387 emit_move_insn (target
, x
);
17390 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17391 For x86_32, -mfpmath=sse, !optimize_size only. */
17393 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17395 REAL_VALUE_TYPE ONE16r
;
17396 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17398 real_ldexp (&ONE16r
, &dconst1
, 16);
17399 x
= const_double_from_real_value (ONE16r
, SFmode
);
17400 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17401 NULL
, 0, OPTAB_DIRECT
);
17402 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17403 NULL
, 0, OPTAB_DIRECT
);
17404 fp_hi
= gen_reg_rtx (SFmode
);
17405 fp_lo
= gen_reg_rtx (SFmode
);
17406 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17407 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17408 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17410 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17412 if (!rtx_equal_p (target
, fp_hi
))
17413 emit_move_insn (target
, fp_hi
);
17416 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17417 a vector of unsigned ints VAL to vector of floats TARGET. */
17420 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17423 REAL_VALUE_TYPE TWO16r
;
17424 enum machine_mode intmode
= GET_MODE (val
);
17425 enum machine_mode fltmode
= GET_MODE (target
);
17426 rtx (*cvt
) (rtx
, rtx
);
17428 if (intmode
== V4SImode
)
17429 cvt
= gen_floatv4siv4sf2
;
17431 cvt
= gen_floatv8siv8sf2
;
17432 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17433 tmp
[0] = force_reg (intmode
, tmp
[0]);
17434 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17436 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17437 NULL_RTX
, 1, OPTAB_DIRECT
);
17438 tmp
[3] = gen_reg_rtx (fltmode
);
17439 emit_insn (cvt (tmp
[3], tmp
[1]));
17440 tmp
[4] = gen_reg_rtx (fltmode
);
17441 emit_insn (cvt (tmp
[4], tmp
[2]));
17442 real_ldexp (&TWO16r
, &dconst1
, 16);
17443 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17444 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17445 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17447 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17449 if (tmp
[7] != target
)
17450 emit_move_insn (target
, tmp
[7]);
17453 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17454 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17455 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17456 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17459 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17461 REAL_VALUE_TYPE TWO31r
;
17462 rtx two31r
, tmp
[4];
17463 enum machine_mode mode
= GET_MODE (val
);
17464 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17465 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17466 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17469 for (i
= 0; i
< 3; i
++)
17470 tmp
[i
] = gen_reg_rtx (mode
);
17471 real_ldexp (&TWO31r
, &dconst1
, 31);
17472 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17473 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17474 two31r
= force_reg (mode
, two31r
);
17477 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17478 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17479 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17480 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17481 default: gcc_unreachable ();
17483 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17484 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17485 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17487 if (intmode
== V4SImode
|| TARGET_AVX2
)
17488 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17489 gen_lowpart (intmode
, tmp
[0]),
17490 GEN_INT (31), NULL_RTX
, 0,
17494 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17495 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17496 *xorp
= expand_simple_binop (intmode
, AND
,
17497 gen_lowpart (intmode
, tmp
[0]),
17498 two31
, NULL_RTX
, 0,
17501 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17505 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17506 then replicate the value for all elements of the vector
17510 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17514 enum machine_mode scalar_mode
;
17531 n_elt
= GET_MODE_NUNITS (mode
);
17532 v
= rtvec_alloc (n_elt
);
17533 scalar_mode
= GET_MODE_INNER (mode
);
17535 RTVEC_ELT (v
, 0) = value
;
17537 for (i
= 1; i
< n_elt
; ++i
)
17538 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17540 return gen_rtx_CONST_VECTOR (mode
, v
);
17543 gcc_unreachable ();
17547 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17548 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17549 for an SSE register. If VECT is true, then replicate the mask for
17550 all elements of the vector register. If INVERT is true, then create
17551 a mask excluding the sign bit. */
17554 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17556 enum machine_mode vec_mode
, imode
;
17557 HOST_WIDE_INT hi
, lo
;
17562 /* Find the sign bit, sign extended to 2*HWI. */
17570 mode
= GET_MODE_INNER (mode
);
17572 lo
= 0x80000000, hi
= lo
< 0;
17580 mode
= GET_MODE_INNER (mode
);
17582 if (HOST_BITS_PER_WIDE_INT
>= 64)
17583 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17585 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17590 vec_mode
= VOIDmode
;
17591 if (HOST_BITS_PER_WIDE_INT
>= 64)
17594 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17601 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17605 lo
= ~lo
, hi
= ~hi
;
17611 mask
= immed_double_const (lo
, hi
, imode
);
17613 vec
= gen_rtvec (2, v
, mask
);
17614 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17615 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17622 gcc_unreachable ();
17626 lo
= ~lo
, hi
= ~hi
;
17628 /* Force this value into the low part of a fp vector constant. */
17629 mask
= immed_double_const (lo
, hi
, imode
);
17630 mask
= gen_lowpart (mode
, mask
);
17632 if (vec_mode
== VOIDmode
)
17633 return force_reg (mode
, mask
);
17635 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17636 return force_reg (vec_mode
, v
);
17639 /* Generate code for floating point ABS or NEG. */
17642 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17645 rtx mask
, set
, dst
, src
;
17646 bool use_sse
= false;
17647 bool vector_mode
= VECTOR_MODE_P (mode
);
17648 enum machine_mode vmode
= mode
;
17652 else if (mode
== TFmode
)
17654 else if (TARGET_SSE_MATH
)
17656 use_sse
= SSE_FLOAT_MODE_P (mode
);
17657 if (mode
== SFmode
)
17659 else if (mode
== DFmode
)
17663 /* NEG and ABS performed with SSE use bitwise mask operations.
17664 Create the appropriate mask now. */
17666 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17673 set
= gen_rtx_fmt_e (code
, mode
, src
);
17674 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17681 use
= gen_rtx_USE (VOIDmode
, mask
);
17683 par
= gen_rtvec (2, set
, use
);
17686 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17687 par
= gen_rtvec (3, set
, use
, clob
);
17689 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17695 /* Expand a copysign operation. Special case operand 0 being a constant. */
17698 ix86_expand_copysign (rtx operands
[])
17700 enum machine_mode mode
, vmode
;
17701 rtx dest
, op0
, op1
, mask
, nmask
;
17703 dest
= operands
[0];
17707 mode
= GET_MODE (dest
);
17709 if (mode
== SFmode
)
17711 else if (mode
== DFmode
)
17716 if (GET_CODE (op0
) == CONST_DOUBLE
)
17718 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17720 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17721 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17723 if (mode
== SFmode
|| mode
== DFmode
)
17725 if (op0
== CONST0_RTX (mode
))
17726 op0
= CONST0_RTX (vmode
);
17729 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17731 op0
= force_reg (vmode
, v
);
17734 else if (op0
!= CONST0_RTX (mode
))
17735 op0
= force_reg (mode
, op0
);
17737 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17739 if (mode
== SFmode
)
17740 copysign_insn
= gen_copysignsf3_const
;
17741 else if (mode
== DFmode
)
17742 copysign_insn
= gen_copysigndf3_const
;
17744 copysign_insn
= gen_copysigntf3_const
;
17746 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17750 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17752 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17753 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17755 if (mode
== SFmode
)
17756 copysign_insn
= gen_copysignsf3_var
;
17757 else if (mode
== DFmode
)
17758 copysign_insn
= gen_copysigndf3_var
;
17760 copysign_insn
= gen_copysigntf3_var
;
17762 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17766 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17767 be a constant, and so has already been expanded into a vector constant. */
17770 ix86_split_copysign_const (rtx operands
[])
17772 enum machine_mode mode
, vmode
;
17773 rtx dest
, op0
, mask
, x
;
17775 dest
= operands
[0];
17777 mask
= operands
[3];
17779 mode
= GET_MODE (dest
);
17780 vmode
= GET_MODE (mask
);
17782 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17783 x
= gen_rtx_AND (vmode
, dest
, mask
);
17784 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17786 if (op0
!= CONST0_RTX (vmode
))
17788 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17789 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17793 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17794 so we have to do two masks. */
17797 ix86_split_copysign_var (rtx operands
[])
17799 enum machine_mode mode
, vmode
;
17800 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17802 dest
= operands
[0];
17803 scratch
= operands
[1];
17806 nmask
= operands
[4];
17807 mask
= operands
[5];
17809 mode
= GET_MODE (dest
);
17810 vmode
= GET_MODE (mask
);
17812 if (rtx_equal_p (op0
, op1
))
17814 /* Shouldn't happen often (it's useless, obviously), but when it does
17815 we'd generate incorrect code if we continue below. */
17816 emit_move_insn (dest
, op0
);
17820 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17822 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17824 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17825 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17828 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17829 x
= gen_rtx_NOT (vmode
, dest
);
17830 x
= gen_rtx_AND (vmode
, x
, op0
);
17831 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17835 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17837 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17839 else /* alternative 2,4 */
17841 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17842 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17843 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17845 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17847 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17849 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17850 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17852 else /* alternative 3,4 */
17854 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17856 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17857 x
= gen_rtx_AND (vmode
, dest
, op0
);
17859 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17862 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
17863 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17866 /* Return TRUE or FALSE depending on whether the first SET in INSN
17867 has source and destination with matching CC modes, and that the
17868 CC mode is at least as constrained as REQ_MODE. */
17871 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
17874 enum machine_mode set_mode
;
17876 set
= PATTERN (insn
);
17877 if (GET_CODE (set
) == PARALLEL
)
17878 set
= XVECEXP (set
, 0, 0);
17879 gcc_assert (GET_CODE (set
) == SET
);
17880 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
17882 set_mode
= GET_MODE (SET_DEST (set
));
17886 if (req_mode
!= CCNOmode
17887 && (req_mode
!= CCmode
17888 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
17892 if (req_mode
== CCGCmode
)
17896 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
17900 if (req_mode
== CCZmode
)
17910 if (set_mode
!= req_mode
)
17915 gcc_unreachable ();
17918 return GET_MODE (SET_SRC (set
)) == set_mode
;
17921 /* Generate insn patterns to do an integer compare of OPERANDS. */
17924 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17926 enum machine_mode cmpmode
;
17929 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17930 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17932 /* This is very simple, but making the interface the same as in the
17933 FP case makes the rest of the code easier. */
17934 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17935 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17937 /* Return the test that should be put into the flags user, i.e.
17938 the bcc, scc, or cmov instruction. */
17939 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17942 /* Figure out whether to use ordered or unordered fp comparisons.
17943 Return the appropriate mode to use. */
17946 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17948 /* ??? In order to make all comparisons reversible, we do all comparisons
17949 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17950 all forms trapping and nontrapping comparisons, we can make inequality
17951 comparisons trapping again, since it results in better code when using
17952 FCOM based compares. */
17953 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17957 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17959 enum machine_mode mode
= GET_MODE (op0
);
17961 if (SCALAR_FLOAT_MODE_P (mode
))
17963 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17964 return ix86_fp_compare_mode (code
);
17969 /* Only zero flag is needed. */
17970 case EQ
: /* ZF=0 */
17971 case NE
: /* ZF!=0 */
17973 /* Codes needing carry flag. */
17974 case GEU
: /* CF=0 */
17975 case LTU
: /* CF=1 */
17976 /* Detect overflow checks. They need just the carry flag. */
17977 if (GET_CODE (op0
) == PLUS
17978 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17982 case GTU
: /* CF=0 & ZF=0 */
17983 case LEU
: /* CF=1 | ZF=1 */
17984 /* Detect overflow checks. They need just the carry flag. */
17985 if (GET_CODE (op0
) == MINUS
17986 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17990 /* Codes possibly doable only with sign flag when
17991 comparing against zero. */
17992 case GE
: /* SF=OF or SF=0 */
17993 case LT
: /* SF<>OF or SF=1 */
17994 if (op1
== const0_rtx
)
17997 /* For other cases Carry flag is not required. */
17999 /* Codes doable only with sign flag when comparing
18000 against zero, but we miss jump instruction for it
18001 so we need to use relational tests against overflow
18002 that thus needs to be zero. */
18003 case GT
: /* ZF=0 & SF=OF */
18004 case LE
: /* ZF=1 | SF<>OF */
18005 if (op1
== const0_rtx
)
18009 /* strcmp pattern do (use flags) and combine may ask us for proper
18014 gcc_unreachable ();
18018 /* Return the fixed registers used for condition codes. */
18021 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18028 /* If two condition code modes are compatible, return a condition code
18029 mode which is compatible with both. Otherwise, return
18032 static enum machine_mode
18033 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18038 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18041 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18042 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18045 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18047 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18053 gcc_unreachable ();
18083 /* These are only compatible with themselves, which we already
18090 /* Return a comparison we can do and that it is equivalent to
18091 swap_condition (code) apart possibly from orderedness.
18092 But, never change orderedness if TARGET_IEEE_FP, returning
18093 UNKNOWN in that case if necessary. */
18095 static enum rtx_code
18096 ix86_fp_swap_condition (enum rtx_code code
)
18100 case GT
: /* GTU - CF=0 & ZF=0 */
18101 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18102 case GE
: /* GEU - CF=0 */
18103 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18104 case UNLT
: /* LTU - CF=1 */
18105 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18106 case UNLE
: /* LEU - CF=1 | ZF=1 */
18107 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18109 return swap_condition (code
);
18113 /* Return cost of comparison CODE using the best strategy for performance.
18114 All following functions do use number of instructions as a cost metrics.
18115 In future this should be tweaked to compute bytes for optimize_size and
18116 take into account performance of various instructions on various CPUs. */
18119 ix86_fp_comparison_cost (enum rtx_code code
)
18123 /* The cost of code using bit-twiddling on %ah. */
18140 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18144 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18147 gcc_unreachable ();
18150 switch (ix86_fp_comparison_strategy (code
))
18152 case IX86_FPCMP_COMI
:
18153 return arith_cost
> 4 ? 3 : 2;
18154 case IX86_FPCMP_SAHF
:
18155 return arith_cost
> 4 ? 4 : 3;
18161 /* Return strategy to use for floating-point. We assume that fcomi is always
18162 preferrable where available, since that is also true when looking at size
18163 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18165 enum ix86_fpcmp_strategy
18166 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18168 /* Do fcomi/sahf based test when profitable. */
18171 return IX86_FPCMP_COMI
;
18173 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18174 return IX86_FPCMP_SAHF
;
18176 return IX86_FPCMP_ARITH
;
18179 /* Swap, force into registers, or otherwise massage the two operands
18180 to a fp comparison. The operands are updated in place; the new
18181 comparison code is returned. */
18183 static enum rtx_code
18184 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18186 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18187 rtx op0
= *pop0
, op1
= *pop1
;
18188 enum machine_mode op_mode
= GET_MODE (op0
);
18189 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18191 /* All of the unordered compare instructions only work on registers.
18192 The same is true of the fcomi compare instructions. The XFmode
18193 compare instructions require registers except when comparing
18194 against zero or when converting operand 1 from fixed point to
18198 && (fpcmp_mode
== CCFPUmode
18199 || (op_mode
== XFmode
18200 && ! (standard_80387_constant_p (op0
) == 1
18201 || standard_80387_constant_p (op1
) == 1)
18202 && GET_CODE (op1
) != FLOAT
)
18203 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18205 op0
= force_reg (op_mode
, op0
);
18206 op1
= force_reg (op_mode
, op1
);
18210 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18211 things around if they appear profitable, otherwise force op0
18212 into a register. */
18214 if (standard_80387_constant_p (op0
) == 0
18216 && ! (standard_80387_constant_p (op1
) == 0
18219 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18220 if (new_code
!= UNKNOWN
)
18223 tmp
= op0
, op0
= op1
, op1
= tmp
;
18229 op0
= force_reg (op_mode
, op0
);
18231 if (CONSTANT_P (op1
))
18233 int tmp
= standard_80387_constant_p (op1
);
18235 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18239 op1
= force_reg (op_mode
, op1
);
18242 op1
= force_reg (op_mode
, op1
);
18246 /* Try to rearrange the comparison to make it cheaper. */
18247 if (ix86_fp_comparison_cost (code
)
18248 > ix86_fp_comparison_cost (swap_condition (code
))
18249 && (REG_P (op1
) || can_create_pseudo_p ()))
18252 tmp
= op0
, op0
= op1
, op1
= tmp
;
18253 code
= swap_condition (code
);
18255 op0
= force_reg (op_mode
, op0
);
18263 /* Convert comparison codes we use to represent FP comparison to integer
18264 code that will result in proper branch. Return UNKNOWN if no such code
18268 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18297 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18300 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18302 enum machine_mode fpcmp_mode
, intcmp_mode
;
18305 fpcmp_mode
= ix86_fp_compare_mode (code
);
18306 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18308 /* Do fcomi/sahf based test when profitable. */
18309 switch (ix86_fp_comparison_strategy (code
))
18311 case IX86_FPCMP_COMI
:
18312 intcmp_mode
= fpcmp_mode
;
18313 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18314 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18319 case IX86_FPCMP_SAHF
:
18320 intcmp_mode
= fpcmp_mode
;
18321 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18322 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18326 scratch
= gen_reg_rtx (HImode
);
18327 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18328 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18331 case IX86_FPCMP_ARITH
:
18332 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18333 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18334 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18336 scratch
= gen_reg_rtx (HImode
);
18337 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18339 /* In the unordered case, we have to check C2 for NaN's, which
18340 doesn't happen to work out to anything nice combination-wise.
18341 So do some bit twiddling on the value we've got in AH to come
18342 up with an appropriate set of condition codes. */
18344 intcmp_mode
= CCNOmode
;
18349 if (code
== GT
|| !TARGET_IEEE_FP
)
18351 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18356 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18357 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18358 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18359 intcmp_mode
= CCmode
;
18365 if (code
== LT
&& TARGET_IEEE_FP
)
18367 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18368 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18369 intcmp_mode
= CCmode
;
18374 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18380 if (code
== GE
|| !TARGET_IEEE_FP
)
18382 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18387 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18388 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18394 if (code
== LE
&& TARGET_IEEE_FP
)
18396 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18397 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18398 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18399 intcmp_mode
= CCmode
;
18404 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18410 if (code
== EQ
&& TARGET_IEEE_FP
)
18412 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18413 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18414 intcmp_mode
= CCmode
;
18419 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18425 if (code
== NE
&& TARGET_IEEE_FP
)
18427 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18428 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18434 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18440 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18444 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18449 gcc_unreachable ();
18457 /* Return the test that should be put into the flags user, i.e.
18458 the bcc, scc, or cmov instruction. */
18459 return gen_rtx_fmt_ee (code
, VOIDmode
,
18460 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18465 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18469 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18470 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18472 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18474 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18475 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18478 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18484 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18486 enum machine_mode mode
= GET_MODE (op0
);
18498 tmp
= ix86_expand_compare (code
, op0
, op1
);
18499 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18500 gen_rtx_LABEL_REF (VOIDmode
, label
),
18502 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18509 /* Expand DImode branch into multiple compare+branch. */
18511 rtx lo
[2], hi
[2], label2
;
18512 enum rtx_code code1
, code2
, code3
;
18513 enum machine_mode submode
;
18515 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18517 tmp
= op0
, op0
= op1
, op1
= tmp
;
18518 code
= swap_condition (code
);
18521 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18522 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18524 submode
= mode
== DImode
? SImode
: DImode
;
18526 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18527 avoid two branches. This costs one extra insn, so disable when
18528 optimizing for size. */
18530 if ((code
== EQ
|| code
== NE
)
18531 && (!optimize_insn_for_size_p ()
18532 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18537 if (hi
[1] != const0_rtx
)
18538 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18539 NULL_RTX
, 0, OPTAB_WIDEN
);
18542 if (lo
[1] != const0_rtx
)
18543 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18544 NULL_RTX
, 0, OPTAB_WIDEN
);
18546 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18547 NULL_RTX
, 0, OPTAB_WIDEN
);
18549 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18553 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18554 op1 is a constant and the low word is zero, then we can just
18555 examine the high word. Similarly for low word -1 and
18556 less-or-equal-than or greater-than. */
18558 if (CONST_INT_P (hi
[1]))
18561 case LT
: case LTU
: case GE
: case GEU
:
18562 if (lo
[1] == const0_rtx
)
18564 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18568 case LE
: case LEU
: case GT
: case GTU
:
18569 if (lo
[1] == constm1_rtx
)
18571 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18579 /* Otherwise, we need two or three jumps. */
18581 label2
= gen_label_rtx ();
18584 code2
= swap_condition (code
);
18585 code3
= unsigned_condition (code
);
18589 case LT
: case GT
: case LTU
: case GTU
:
18592 case LE
: code1
= LT
; code2
= GT
; break;
18593 case GE
: code1
= GT
; code2
= LT
; break;
18594 case LEU
: code1
= LTU
; code2
= GTU
; break;
18595 case GEU
: code1
= GTU
; code2
= LTU
; break;
18597 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18598 case NE
: code2
= UNKNOWN
; break;
18601 gcc_unreachable ();
18606 * if (hi(a) < hi(b)) goto true;
18607 * if (hi(a) > hi(b)) goto false;
18608 * if (lo(a) < lo(b)) goto true;
18612 if (code1
!= UNKNOWN
)
18613 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18614 if (code2
!= UNKNOWN
)
18615 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18617 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18619 if (code2
!= UNKNOWN
)
18620 emit_label (label2
);
18625 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18630 /* Split branch based on floating point condition. */
18632 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18633 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18638 if (target2
!= pc_rtx
)
18641 code
= reverse_condition_maybe_unordered (code
);
18646 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18649 /* Remove pushed operand from stack. */
18651 ix86_free_from_memory (GET_MODE (pushed
));
18653 i
= emit_jump_insn (gen_rtx_SET
18655 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18656 condition
, target1
, target2
)));
18657 if (split_branch_probability
>= 0)
18658 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18662 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18666 gcc_assert (GET_MODE (dest
) == QImode
);
18668 ret
= ix86_expand_compare (code
, op0
, op1
);
18669 PUT_MODE (ret
, QImode
);
18670 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18673 /* Expand comparison setting or clearing carry flag. Return true when
18674 successful and set pop for the operation. */
18676 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18678 enum machine_mode mode
=
18679 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18681 /* Do not handle double-mode compares that go through special path. */
18682 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18685 if (SCALAR_FLOAT_MODE_P (mode
))
18687 rtx compare_op
, compare_seq
;
18689 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18691 /* Shortcut: following common codes never translate
18692 into carry flag compares. */
18693 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18694 || code
== ORDERED
|| code
== UNORDERED
)
18697 /* These comparisons require zero flag; swap operands so they won't. */
18698 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18699 && !TARGET_IEEE_FP
)
18704 code
= swap_condition (code
);
18707 /* Try to expand the comparison and verify that we end up with
18708 carry flag based comparison. This fails to be true only when
18709 we decide to expand comparison using arithmetic that is not
18710 too common scenario. */
18712 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18713 compare_seq
= get_insns ();
18716 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18717 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18718 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18720 code
= GET_CODE (compare_op
);
18722 if (code
!= LTU
&& code
!= GEU
)
18725 emit_insn (compare_seq
);
18730 if (!INTEGRAL_MODE_P (mode
))
18739 /* Convert a==0 into (unsigned)a<1. */
18742 if (op1
!= const0_rtx
)
18745 code
= (code
== EQ
? LTU
: GEU
);
18748 /* Convert a>b into b<a or a>=b-1. */
18751 if (CONST_INT_P (op1
))
18753 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18754 /* Bail out on overflow. We still can swap operands but that
18755 would force loading of the constant into register. */
18756 if (op1
== const0_rtx
18757 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18759 code
= (code
== GTU
? GEU
: LTU
);
18766 code
= (code
== GTU
? LTU
: GEU
);
18770 /* Convert a>=0 into (unsigned)a<0x80000000. */
18773 if (mode
== DImode
|| op1
!= const0_rtx
)
18775 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18776 code
= (code
== LT
? GEU
: LTU
);
18780 if (mode
== DImode
|| op1
!= constm1_rtx
)
18782 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18783 code
= (code
== LE
? GEU
: LTU
);
18789 /* Swapping operands may cause constant to appear as first operand. */
18790 if (!nonimmediate_operand (op0
, VOIDmode
))
18792 if (!can_create_pseudo_p ())
18794 op0
= force_reg (mode
, op0
);
18796 *pop
= ix86_expand_compare (code
, op0
, op1
);
18797 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18802 ix86_expand_int_movcc (rtx operands
[])
18804 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18805 rtx compare_seq
, compare_op
;
18806 enum machine_mode mode
= GET_MODE (operands
[0]);
18807 bool sign_bit_compare_p
= false;
18808 rtx op0
= XEXP (operands
[1], 0);
18809 rtx op1
= XEXP (operands
[1], 1);
18811 if (GET_MODE (op0
) == TImode
18812 || (GET_MODE (op0
) == DImode
18817 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18818 compare_seq
= get_insns ();
18821 compare_code
= GET_CODE (compare_op
);
18823 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18824 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18825 sign_bit_compare_p
= true;
18827 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18828 HImode insns, we'd be swallowed in word prefix ops. */
18830 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18831 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18832 && CONST_INT_P (operands
[2])
18833 && CONST_INT_P (operands
[3]))
18835 rtx out
= operands
[0];
18836 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18837 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18838 HOST_WIDE_INT diff
;
18841 /* Sign bit compares are better done using shifts than we do by using
18843 if (sign_bit_compare_p
18844 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18846 /* Detect overlap between destination and compare sources. */
18849 if (!sign_bit_compare_p
)
18852 bool fpcmp
= false;
18854 compare_code
= GET_CODE (compare_op
);
18856 flags
= XEXP (compare_op
, 0);
18858 if (GET_MODE (flags
) == CCFPmode
18859 || GET_MODE (flags
) == CCFPUmode
)
18863 = ix86_fp_compare_code_to_integer (compare_code
);
18866 /* To simplify rest of code, restrict to the GEU case. */
18867 if (compare_code
== LTU
)
18869 HOST_WIDE_INT tmp
= ct
;
18872 compare_code
= reverse_condition (compare_code
);
18873 code
= reverse_condition (code
);
18878 PUT_CODE (compare_op
,
18879 reverse_condition_maybe_unordered
18880 (GET_CODE (compare_op
)));
18882 PUT_CODE (compare_op
,
18883 reverse_condition (GET_CODE (compare_op
)));
18887 if (reg_overlap_mentioned_p (out
, op0
)
18888 || reg_overlap_mentioned_p (out
, op1
))
18889 tmp
= gen_reg_rtx (mode
);
18891 if (mode
== DImode
)
18892 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
18894 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
18895 flags
, compare_op
));
18899 if (code
== GT
|| code
== GE
)
18900 code
= reverse_condition (code
);
18903 HOST_WIDE_INT tmp
= ct
;
18908 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18921 tmp
= expand_simple_binop (mode
, PLUS
,
18923 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18934 tmp
= expand_simple_binop (mode
, IOR
,
18936 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18938 else if (diff
== -1 && ct
)
18948 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18950 tmp
= expand_simple_binop (mode
, PLUS
,
18951 copy_rtx (tmp
), GEN_INT (cf
),
18952 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18960 * andl cf - ct, dest
18970 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18973 tmp
= expand_simple_binop (mode
, AND
,
18975 gen_int_mode (cf
- ct
, mode
),
18976 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18978 tmp
= expand_simple_binop (mode
, PLUS
,
18979 copy_rtx (tmp
), GEN_INT (ct
),
18980 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18983 if (!rtx_equal_p (tmp
, out
))
18984 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18991 enum machine_mode cmp_mode
= GET_MODE (op0
);
18994 tmp
= ct
, ct
= cf
, cf
= tmp
;
18997 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18999 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19001 /* We may be reversing unordered compare to normal compare, that
19002 is not valid in general (we may convert non-trapping condition
19003 to trapping one), however on i386 we currently emit all
19004 comparisons unordered. */
19005 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19006 code
= reverse_condition_maybe_unordered (code
);
19010 compare_code
= reverse_condition (compare_code
);
19011 code
= reverse_condition (code
);
19015 compare_code
= UNKNOWN
;
19016 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19017 && CONST_INT_P (op1
))
19019 if (op1
== const0_rtx
19020 && (code
== LT
|| code
== GE
))
19021 compare_code
= code
;
19022 else if (op1
== constm1_rtx
)
19026 else if (code
== GT
)
19031 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19032 if (compare_code
!= UNKNOWN
19033 && GET_MODE (op0
) == GET_MODE (out
)
19034 && (cf
== -1 || ct
== -1))
19036 /* If lea code below could be used, only optimize
19037 if it results in a 2 insn sequence. */
19039 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19040 || diff
== 3 || diff
== 5 || diff
== 9)
19041 || (compare_code
== LT
&& ct
== -1)
19042 || (compare_code
== GE
&& cf
== -1))
19045 * notl op1 (if necessary)
19053 code
= reverse_condition (code
);
19056 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19058 out
= expand_simple_binop (mode
, IOR
,
19060 out
, 1, OPTAB_DIRECT
);
19061 if (out
!= operands
[0])
19062 emit_move_insn (operands
[0], out
);
19069 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19070 || diff
== 3 || diff
== 5 || diff
== 9)
19071 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19073 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19079 * lea cf(dest*(ct-cf)),dest
19083 * This also catches the degenerate setcc-only case.
19089 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19092 /* On x86_64 the lea instruction operates on Pmode, so we need
19093 to get arithmetics done in proper mode to match. */
19095 tmp
= copy_rtx (out
);
19099 out1
= copy_rtx (out
);
19100 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19104 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19110 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19113 if (!rtx_equal_p (tmp
, out
))
19116 out
= force_operand (tmp
, copy_rtx (out
));
19118 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19120 if (!rtx_equal_p (out
, operands
[0]))
19121 emit_move_insn (operands
[0], copy_rtx (out
));
19127 * General case: Jumpful:
19128 * xorl dest,dest cmpl op1, op2
19129 * cmpl op1, op2 movl ct, dest
19130 * setcc dest jcc 1f
19131 * decl dest movl cf, dest
19132 * andl (cf-ct),dest 1:
19135 * Size 20. Size 14.
19137 * This is reasonably steep, but branch mispredict costs are
19138 * high on modern cpus, so consider failing only if optimizing
19142 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19143 && BRANCH_COST (optimize_insn_for_speed_p (),
19148 enum machine_mode cmp_mode
= GET_MODE (op0
);
19153 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19155 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19157 /* We may be reversing unordered compare to normal compare,
19158 that is not valid in general (we may convert non-trapping
19159 condition to trapping one), however on i386 we currently
19160 emit all comparisons unordered. */
19161 code
= reverse_condition_maybe_unordered (code
);
19165 code
= reverse_condition (code
);
19166 if (compare_code
!= UNKNOWN
)
19167 compare_code
= reverse_condition (compare_code
);
19171 if (compare_code
!= UNKNOWN
)
19173 /* notl op1 (if needed)
19178 For x < 0 (resp. x <= -1) there will be no notl,
19179 so if possible swap the constants to get rid of the
19181 True/false will be -1/0 while code below (store flag
19182 followed by decrement) is 0/-1, so the constants need
19183 to be exchanged once more. */
19185 if (compare_code
== GE
|| !cf
)
19187 code
= reverse_condition (code
);
19192 HOST_WIDE_INT tmp
= cf
;
19197 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19201 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19203 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19205 copy_rtx (out
), 1, OPTAB_DIRECT
);
19208 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19209 gen_int_mode (cf
- ct
, mode
),
19210 copy_rtx (out
), 1, OPTAB_DIRECT
);
19212 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19213 copy_rtx (out
), 1, OPTAB_DIRECT
);
19214 if (!rtx_equal_p (out
, operands
[0]))
19215 emit_move_insn (operands
[0], copy_rtx (out
));
19221 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19223 /* Try a few things more with specific constants and a variable. */
19226 rtx var
, orig_out
, out
, tmp
;
19228 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19231 /* If one of the two operands is an interesting constant, load a
19232 constant with the above and mask it in with a logical operation. */
19234 if (CONST_INT_P (operands
[2]))
19237 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19238 operands
[3] = constm1_rtx
, op
= and_optab
;
19239 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19240 operands
[3] = const0_rtx
, op
= ior_optab
;
19244 else if (CONST_INT_P (operands
[3]))
19247 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19248 operands
[2] = constm1_rtx
, op
= and_optab
;
19249 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19250 operands
[2] = const0_rtx
, op
= ior_optab
;
19257 orig_out
= operands
[0];
19258 tmp
= gen_reg_rtx (mode
);
19261 /* Recurse to get the constant loaded. */
19262 if (ix86_expand_int_movcc (operands
) == 0)
19265 /* Mask in the interesting variable. */
19266 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19268 if (!rtx_equal_p (out
, orig_out
))
19269 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19275 * For comparison with above,
19285 if (! nonimmediate_operand (operands
[2], mode
))
19286 operands
[2] = force_reg (mode
, operands
[2]);
19287 if (! nonimmediate_operand (operands
[3], mode
))
19288 operands
[3] = force_reg (mode
, operands
[3]);
19290 if (! register_operand (operands
[2], VOIDmode
)
19292 || ! register_operand (operands
[3], VOIDmode
)))
19293 operands
[2] = force_reg (mode
, operands
[2]);
19296 && ! register_operand (operands
[3], VOIDmode
))
19297 operands
[3] = force_reg (mode
, operands
[3]);
19299 emit_insn (compare_seq
);
19300 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19301 gen_rtx_IF_THEN_ELSE (mode
,
19302 compare_op
, operands
[2],
19307 /* Swap, force into registers, or otherwise massage the two operands
19308 to an sse comparison with a mask result. Thus we differ a bit from
19309 ix86_prepare_fp_compare_args which expects to produce a flags result.
19311 The DEST operand exists to help determine whether to commute commutative
19312 operators. The POP0/POP1 operands are updated in place. The new
19313 comparison code is returned, or UNKNOWN if not implementable. */
19315 static enum rtx_code
19316 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19317 rtx
*pop0
, rtx
*pop1
)
19325 /* AVX supports all the needed comparisons. */
19328 /* We have no LTGT as an operator. We could implement it with
19329 NE & ORDERED, but this requires an extra temporary. It's
19330 not clear that it's worth it. */
19337 /* These are supported directly. */
19344 /* AVX has 3 operand comparisons, no need to swap anything. */
19347 /* For commutative operators, try to canonicalize the destination
19348 operand to be first in the comparison - this helps reload to
19349 avoid extra moves. */
19350 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19358 /* These are not supported directly before AVX, and furthermore
19359 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19360 comparison operands to transform into something that is
19365 code
= swap_condition (code
);
19369 gcc_unreachable ();
19375 /* Detect conditional moves that exactly match min/max operational
19376 semantics. Note that this is IEEE safe, as long as we don't
19377 interchange the operands.
19379 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19380 and TRUE if the operation is successful and instructions are emitted. */
19383 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19384 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19386 enum machine_mode mode
;
19392 else if (code
== UNGE
)
19395 if_true
= if_false
;
19401 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19403 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19408 mode
= GET_MODE (dest
);
19410 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19411 but MODE may be a vector mode and thus not appropriate. */
19412 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19414 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19417 if_true
= force_reg (mode
, if_true
);
19418 v
= gen_rtvec (2, if_true
, if_false
);
19419 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19423 code
= is_min
? SMIN
: SMAX
;
19424 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19427 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19431 /* Expand an sse vector comparison. Return the register with the result. */
19434 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19435 rtx op_true
, rtx op_false
)
19437 enum machine_mode mode
= GET_MODE (dest
);
19438 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19441 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19442 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19443 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19446 || reg_overlap_mentioned_p (dest
, op_true
)
19447 || reg_overlap_mentioned_p (dest
, op_false
))
19448 dest
= gen_reg_rtx (mode
);
19450 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19451 if (cmp_mode
!= mode
)
19453 x
= force_reg (cmp_mode
, x
);
19454 convert_move (dest
, x
, false);
19457 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19462 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19463 operations. This is used for both scalar and vector conditional moves. */
19466 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19468 enum machine_mode mode
= GET_MODE (dest
);
19471 if (vector_all_ones_operand (op_true
, mode
)
19472 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19474 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19476 else if (op_false
== CONST0_RTX (mode
))
19478 op_true
= force_reg (mode
, op_true
);
19479 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19480 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19482 else if (op_true
== CONST0_RTX (mode
))
19484 op_false
= force_reg (mode
, op_false
);
19485 x
= gen_rtx_NOT (mode
, cmp
);
19486 x
= gen_rtx_AND (mode
, x
, op_false
);
19487 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19489 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19491 op_false
= force_reg (mode
, op_false
);
19492 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19493 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19495 else if (TARGET_XOP
)
19497 op_true
= force_reg (mode
, op_true
);
19499 if (!nonimmediate_operand (op_false
, mode
))
19500 op_false
= force_reg (mode
, op_false
);
19502 emit_insn (gen_rtx_SET (mode
, dest
,
19503 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19509 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19511 if (!nonimmediate_operand (op_true
, mode
))
19512 op_true
= force_reg (mode
, op_true
);
19514 op_false
= force_reg (mode
, op_false
);
19520 gen
= gen_sse4_1_blendvps
;
19524 gen
= gen_sse4_1_blendvpd
;
19532 gen
= gen_sse4_1_pblendvb
;
19533 dest
= gen_lowpart (V16QImode
, dest
);
19534 op_false
= gen_lowpart (V16QImode
, op_false
);
19535 op_true
= gen_lowpart (V16QImode
, op_true
);
19536 cmp
= gen_lowpart (V16QImode
, cmp
);
19541 gen
= gen_avx_blendvps256
;
19545 gen
= gen_avx_blendvpd256
;
19553 gen
= gen_avx2_pblendvb
;
19554 dest
= gen_lowpart (V32QImode
, dest
);
19555 op_false
= gen_lowpart (V32QImode
, op_false
);
19556 op_true
= gen_lowpart (V32QImode
, op_true
);
19557 cmp
= gen_lowpart (V32QImode
, cmp
);
19565 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19568 op_true
= force_reg (mode
, op_true
);
19570 t2
= gen_reg_rtx (mode
);
19572 t3
= gen_reg_rtx (mode
);
19576 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19577 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19579 x
= gen_rtx_NOT (mode
, cmp
);
19580 x
= gen_rtx_AND (mode
, x
, op_false
);
19581 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19583 x
= gen_rtx_IOR (mode
, t3
, t2
);
19584 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19589 /* Expand a floating-point conditional move. Return true if successful. */
19592 ix86_expand_fp_movcc (rtx operands
[])
19594 enum machine_mode mode
= GET_MODE (operands
[0]);
19595 enum rtx_code code
= GET_CODE (operands
[1]);
19596 rtx tmp
, compare_op
;
19597 rtx op0
= XEXP (operands
[1], 0);
19598 rtx op1
= XEXP (operands
[1], 1);
19600 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19602 enum machine_mode cmode
;
19604 /* Since we've no cmove for sse registers, don't force bad register
19605 allocation just to gain access to it. Deny movcc when the
19606 comparison mode doesn't match the move mode. */
19607 cmode
= GET_MODE (op0
);
19608 if (cmode
== VOIDmode
)
19609 cmode
= GET_MODE (op1
);
19613 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19614 if (code
== UNKNOWN
)
19617 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19618 operands
[2], operands
[3]))
19621 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19622 operands
[2], operands
[3]);
19623 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19627 /* The floating point conditional move instructions don't directly
19628 support conditions resulting from a signed integer comparison. */
19630 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19631 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19633 tmp
= gen_reg_rtx (QImode
);
19634 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19636 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19639 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19640 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19641 operands
[2], operands
[3])));
19646 /* Expand a floating-point vector conditional move; a vcond operation
19647 rather than a movcc operation. */
19650 ix86_expand_fp_vcond (rtx operands
[])
19652 enum rtx_code code
= GET_CODE (operands
[3]);
19655 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19656 &operands
[4], &operands
[5]);
19657 if (code
== UNKNOWN
)
19660 switch (GET_CODE (operands
[3]))
19663 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19664 operands
[5], operands
[0], operands
[0]);
19665 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19666 operands
[5], operands
[1], operands
[2]);
19670 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19671 operands
[5], operands
[0], operands
[0]);
19672 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19673 operands
[5], operands
[1], operands
[2]);
19677 gcc_unreachable ();
19679 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19681 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19685 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19686 operands
[5], operands
[1], operands
[2]))
19689 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19690 operands
[1], operands
[2]);
19691 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19695 /* Expand a signed/unsigned integral vector conditional move. */
19698 ix86_expand_int_vcond (rtx operands
[])
19700 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19701 enum machine_mode mode
= GET_MODE (operands
[4]);
19702 enum rtx_code code
= GET_CODE (operands
[3]);
19703 bool negate
= false;
19706 cop0
= operands
[4];
19707 cop1
= operands
[5];
19709 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19710 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19711 if ((code
== LT
|| code
== GE
)
19712 && data_mode
== mode
19713 && cop1
== CONST0_RTX (mode
)
19714 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
19715 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
19716 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
19717 && (GET_MODE_SIZE (data_mode
) == 16
19718 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
19720 rtx negop
= operands
[2 - (code
== LT
)];
19721 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
19722 if (negop
== CONST1_RTX (data_mode
))
19724 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
19725 operands
[0], 1, OPTAB_DIRECT
);
19726 if (res
!= operands
[0])
19727 emit_move_insn (operands
[0], res
);
19730 else if (GET_MODE_INNER (data_mode
) != DImode
19731 && vector_all_ones_operand (negop
, data_mode
))
19733 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
19734 operands
[0], 0, OPTAB_DIRECT
);
19735 if (res
!= operands
[0])
19736 emit_move_insn (operands
[0], res
);
19741 if (!nonimmediate_operand (cop1
, mode
))
19742 cop1
= force_reg (mode
, cop1
);
19743 if (!general_operand (operands
[1], data_mode
))
19744 operands
[1] = force_reg (data_mode
, operands
[1]);
19745 if (!general_operand (operands
[2], data_mode
))
19746 operands
[2] = force_reg (data_mode
, operands
[2]);
19748 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19750 && (mode
== V16QImode
|| mode
== V8HImode
19751 || mode
== V4SImode
|| mode
== V2DImode
))
19755 /* Canonicalize the comparison to EQ, GT, GTU. */
19766 code
= reverse_condition (code
);
19772 code
= reverse_condition (code
);
19778 code
= swap_condition (code
);
19779 x
= cop0
, cop0
= cop1
, cop1
= x
;
19783 gcc_unreachable ();
19786 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19787 if (mode
== V2DImode
)
19792 /* SSE4.1 supports EQ. */
19793 if (!TARGET_SSE4_1
)
19799 /* SSE4.2 supports GT/GTU. */
19800 if (!TARGET_SSE4_2
)
19805 gcc_unreachable ();
19809 /* Unsigned parallel compare is not supported by the hardware.
19810 Play some tricks to turn this into a signed comparison
19814 cop0
= force_reg (mode
, cop0
);
19824 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19828 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19829 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19830 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19831 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19833 gcc_unreachable ();
19835 /* Subtract (-(INT MAX) - 1) from both operands to make
19837 mask
= ix86_build_signbit_mask (mode
, true, false);
19838 t1
= gen_reg_rtx (mode
);
19839 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19841 t2
= gen_reg_rtx (mode
);
19842 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19854 /* Perform a parallel unsigned saturating subtraction. */
19855 x
= gen_reg_rtx (mode
);
19856 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19857 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19860 cop1
= CONST0_RTX (mode
);
19866 gcc_unreachable ();
19871 /* Allow the comparison to be done in one mode, but the movcc to
19872 happen in another mode. */
19873 if (data_mode
== mode
)
19875 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
19876 operands
[1+negate
], operands
[2-negate
]);
19880 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
19881 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
19883 operands
[1+negate
], operands
[2-negate
]);
19884 x
= gen_lowpart (data_mode
, x
);
19887 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
19888 operands
[2-negate
]);
19892 /* Expand a variable vector permutation. */
19895 ix86_expand_vec_perm (rtx operands
[])
19897 rtx target
= operands
[0];
19898 rtx op0
= operands
[1];
19899 rtx op1
= operands
[2];
19900 rtx mask
= operands
[3];
19901 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
19902 enum machine_mode mode
= GET_MODE (op0
);
19903 enum machine_mode maskmode
= GET_MODE (mask
);
19905 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
19907 /* Number of elements in the vector. */
19908 w
= GET_MODE_NUNITS (mode
);
19909 e
= GET_MODE_UNIT_SIZE (mode
);
19910 gcc_assert (w
<= 32);
19914 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
19916 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19917 an constant shuffle operand. With a tiny bit of effort we can
19918 use VPERMD instead. A re-interpretation stall for V4DFmode is
19919 unfortunate but there's no avoiding it.
19920 Similarly for V16HImode we don't have instructions for variable
19921 shuffling, while for V32QImode we can use after preparing suitable
19922 masks vpshufb; vpshufb; vpermq; vpor. */
19924 if (mode
== V16HImode
)
19926 maskmode
= mode
= V32QImode
;
19932 maskmode
= mode
= V8SImode
;
19936 t1
= gen_reg_rtx (maskmode
);
19938 /* Replicate the low bits of the V4DImode mask into V8SImode:
19940 t1 = { A A B B C C D D }. */
19941 for (i
= 0; i
< w
/ 2; ++i
)
19942 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
19943 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19944 vt
= force_reg (maskmode
, vt
);
19945 mask
= gen_lowpart (maskmode
, mask
);
19946 if (maskmode
== V8SImode
)
19947 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
19949 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
19951 /* Multiply the shuffle indicies by two. */
19952 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
19955 /* Add one to the odd shuffle indicies:
19956 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19957 for (i
= 0; i
< w
/ 2; ++i
)
19959 vec
[i
* 2] = const0_rtx
;
19960 vec
[i
* 2 + 1] = const1_rtx
;
19962 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19963 vt
= force_const_mem (maskmode
, vt
);
19964 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
19967 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19968 operands
[3] = mask
= t1
;
19969 target
= gen_lowpart (mode
, target
);
19970 op0
= gen_lowpart (mode
, op0
);
19971 op1
= gen_lowpart (mode
, op1
);
19977 /* The VPERMD and VPERMPS instructions already properly ignore
19978 the high bits of the shuffle elements. No need for us to
19979 perform an AND ourselves. */
19980 if (one_operand_shuffle
)
19981 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
19984 t1
= gen_reg_rtx (V8SImode
);
19985 t2
= gen_reg_rtx (V8SImode
);
19986 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
19987 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
19993 mask
= gen_lowpart (V8SFmode
, mask
);
19994 if (one_operand_shuffle
)
19995 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
19998 t1
= gen_reg_rtx (V8SFmode
);
19999 t2
= gen_reg_rtx (V8SFmode
);
20000 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20001 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20007 /* By combining the two 128-bit input vectors into one 256-bit
20008 input vector, we can use VPERMD and VPERMPS for the full
20009 two-operand shuffle. */
20010 t1
= gen_reg_rtx (V8SImode
);
20011 t2
= gen_reg_rtx (V8SImode
);
20012 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20013 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20014 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20015 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20019 t1
= gen_reg_rtx (V8SFmode
);
20020 t2
= gen_reg_rtx (V8SImode
);
20021 mask
= gen_lowpart (V4SImode
, mask
);
20022 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20023 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20024 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20025 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20029 t1
= gen_reg_rtx (V32QImode
);
20030 t2
= gen_reg_rtx (V32QImode
);
20031 t3
= gen_reg_rtx (V32QImode
);
20032 vt2
= GEN_INT (128);
20033 for (i
= 0; i
< 32; i
++)
20035 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20036 vt
= force_reg (V32QImode
, vt
);
20037 for (i
= 0; i
< 32; i
++)
20038 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20039 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20040 vt2
= force_reg (V32QImode
, vt2
);
20041 /* From mask create two adjusted masks, which contain the same
20042 bits as mask in the low 7 bits of each vector element.
20043 The first mask will have the most significant bit clear
20044 if it requests element from the same 128-bit lane
20045 and MSB set if it requests element from the other 128-bit lane.
20046 The second mask will have the opposite values of the MSB,
20047 and additionally will have its 128-bit lanes swapped.
20048 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20049 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20050 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20051 stands for other 12 bytes. */
20052 /* The bit whether element is from the same lane or the other
20053 lane is bit 4, so shift it up by 3 to the MSB position. */
20054 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20055 gen_lowpart (V4DImode
, mask
),
20057 /* Clear MSB bits from the mask just in case it had them set. */
20058 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20059 /* After this t1 will have MSB set for elements from other lane. */
20060 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20061 /* Clear bits other than MSB. */
20062 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20063 /* Or in the lower bits from mask into t3. */
20064 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20065 /* And invert MSB bits in t1, so MSB is set for elements from the same
20067 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20068 /* Swap 128-bit lanes in t3. */
20069 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20070 gen_lowpart (V4DImode
, t3
),
20071 const2_rtx
, GEN_INT (3),
20072 const0_rtx
, const1_rtx
));
20073 /* And or in the lower bits from mask into t1. */
20074 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20075 if (one_operand_shuffle
)
20077 /* Each of these shuffles will put 0s in places where
20078 element from the other 128-bit lane is needed, otherwise
20079 will shuffle in the requested value. */
20080 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20081 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20082 /* For t3 the 128-bit lanes are swapped again. */
20083 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20084 gen_lowpart (V4DImode
, t3
),
20085 const2_rtx
, GEN_INT (3),
20086 const0_rtx
, const1_rtx
));
20087 /* And oring both together leads to the result. */
20088 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20092 t4
= gen_reg_rtx (V32QImode
);
20093 /* Similarly to the above one_operand_shuffle code,
20094 just for repeated twice for each operand. merge_two:
20095 code will merge the two results together. */
20096 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20097 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20098 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20099 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20100 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20101 gen_lowpart (V4DImode
, t4
),
20102 const2_rtx
, GEN_INT (3),
20103 const0_rtx
, const1_rtx
));
20104 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20105 gen_lowpart (V4DImode
, t3
),
20106 const2_rtx
, GEN_INT (3),
20107 const0_rtx
, const1_rtx
));
20108 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20109 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20115 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20122 /* The XOP VPPERM insn supports three inputs. By ignoring the
20123 one_operand_shuffle special case, we avoid creating another
20124 set of constant vectors in memory. */
20125 one_operand_shuffle
= false;
20127 /* mask = mask & {2*w-1, ...} */
20128 vt
= GEN_INT (2*w
- 1);
20132 /* mask = mask & {w-1, ...} */
20133 vt
= GEN_INT (w
- 1);
20136 for (i
= 0; i
< w
; i
++)
20138 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20139 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20140 NULL_RTX
, 0, OPTAB_DIRECT
);
20142 /* For non-QImode operations, convert the word permutation control
20143 into a byte permutation control. */
20144 if (mode
!= V16QImode
)
20146 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20147 GEN_INT (exact_log2 (e
)),
20148 NULL_RTX
, 0, OPTAB_DIRECT
);
20150 /* Convert mask to vector of chars. */
20151 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20153 /* Replicate each of the input bytes into byte positions:
20154 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20155 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20156 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20157 for (i
= 0; i
< 16; ++i
)
20158 vec
[i
] = GEN_INT (i
/e
* e
);
20159 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20160 vt
= force_const_mem (V16QImode
, vt
);
20162 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20164 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20166 /* Convert it into the byte positions by doing
20167 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20168 for (i
= 0; i
< 16; ++i
)
20169 vec
[i
] = GEN_INT (i
% e
);
20170 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20171 vt
= force_const_mem (V16QImode
, vt
);
20172 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20175 /* The actual shuffle operations all operate on V16QImode. */
20176 op0
= gen_lowpart (V16QImode
, op0
);
20177 op1
= gen_lowpart (V16QImode
, op1
);
20178 target
= gen_lowpart (V16QImode
, target
);
20182 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20184 else if (one_operand_shuffle
)
20186 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20193 /* Shuffle the two input vectors independently. */
20194 t1
= gen_reg_rtx (V16QImode
);
20195 t2
= gen_reg_rtx (V16QImode
);
20196 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20197 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20200 /* Then merge them together. The key is whether any given control
20201 element contained a bit set that indicates the second word. */
20202 mask
= operands
[3];
20204 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20206 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20207 more shuffle to convert the V2DI input mask into a V4SI
20208 input mask. At which point the masking that expand_int_vcond
20209 will work as desired. */
20210 rtx t3
= gen_reg_rtx (V4SImode
);
20211 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20212 const0_rtx
, const0_rtx
,
20213 const2_rtx
, const2_rtx
));
20215 maskmode
= V4SImode
;
20219 for (i
= 0; i
< w
; i
++)
20221 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20222 vt
= force_reg (maskmode
, vt
);
20223 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20224 NULL_RTX
, 0, OPTAB_DIRECT
);
20226 xops
[0] = gen_lowpart (mode
, operands
[0]);
20227 xops
[1] = gen_lowpart (mode
, t2
);
20228 xops
[2] = gen_lowpart (mode
, t1
);
20229 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20232 ok
= ix86_expand_int_vcond (xops
);
20237 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20238 true if we should do zero extension, else sign extension. HIGH_P is
20239 true if we want the N/2 high elements, else the low elements. */
20242 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20244 enum machine_mode imode
= GET_MODE (src
);
20249 rtx (*unpack
)(rtx
, rtx
);
20250 rtx (*extract
)(rtx
, rtx
) = NULL
;
20251 enum machine_mode halfmode
= BLKmode
;
20257 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20259 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20260 halfmode
= V16QImode
;
20262 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20266 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20268 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20269 halfmode
= V8HImode
;
20271 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20275 unpack
= gen_avx2_zero_extendv4siv4di2
;
20277 unpack
= gen_avx2_sign_extendv4siv4di2
;
20278 halfmode
= V4SImode
;
20280 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20284 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20286 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20290 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20292 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20296 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20298 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20301 gcc_unreachable ();
20304 if (GET_MODE_SIZE (imode
) == 32)
20306 tmp
= gen_reg_rtx (halfmode
);
20307 emit_insn (extract (tmp
, src
));
20311 /* Shift higher 8 bytes to lower 8 bytes. */
20312 tmp
= gen_reg_rtx (imode
);
20313 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20314 gen_lowpart (V1TImode
, src
),
20320 emit_insn (unpack (dest
, tmp
));
20324 rtx (*unpack
)(rtx
, rtx
, rtx
);
20330 unpack
= gen_vec_interleave_highv16qi
;
20332 unpack
= gen_vec_interleave_lowv16qi
;
20336 unpack
= gen_vec_interleave_highv8hi
;
20338 unpack
= gen_vec_interleave_lowv8hi
;
20342 unpack
= gen_vec_interleave_highv4si
;
20344 unpack
= gen_vec_interleave_lowv4si
;
20347 gcc_unreachable ();
20351 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20353 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20354 src
, pc_rtx
, pc_rtx
);
20356 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20360 /* Expand conditional increment or decrement using adb/sbb instructions.
20361 The default case using setcc followed by the conditional move can be
20362 done by generic code. */
20364 ix86_expand_int_addcc (rtx operands
[])
20366 enum rtx_code code
= GET_CODE (operands
[1]);
20368 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20370 rtx val
= const0_rtx
;
20371 bool fpcmp
= false;
20372 enum machine_mode mode
;
20373 rtx op0
= XEXP (operands
[1], 0);
20374 rtx op1
= XEXP (operands
[1], 1);
20376 if (operands
[3] != const1_rtx
20377 && operands
[3] != constm1_rtx
)
20379 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20381 code
= GET_CODE (compare_op
);
20383 flags
= XEXP (compare_op
, 0);
20385 if (GET_MODE (flags
) == CCFPmode
20386 || GET_MODE (flags
) == CCFPUmode
)
20389 code
= ix86_fp_compare_code_to_integer (code
);
20396 PUT_CODE (compare_op
,
20397 reverse_condition_maybe_unordered
20398 (GET_CODE (compare_op
)));
20400 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20403 mode
= GET_MODE (operands
[0]);
20405 /* Construct either adc or sbb insn. */
20406 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20411 insn
= gen_subqi3_carry
;
20414 insn
= gen_subhi3_carry
;
20417 insn
= gen_subsi3_carry
;
20420 insn
= gen_subdi3_carry
;
20423 gcc_unreachable ();
20431 insn
= gen_addqi3_carry
;
20434 insn
= gen_addhi3_carry
;
20437 insn
= gen_addsi3_carry
;
20440 insn
= gen_adddi3_carry
;
20443 gcc_unreachable ();
20446 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20452 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20453 but works for floating pointer parameters and nonoffsetable memories.
20454 For pushes, it returns just stack offsets; the values will be saved
20455 in the right order. Maximally three parts are generated. */
20458 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20463 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20465 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20467 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20468 gcc_assert (size
>= 2 && size
<= 4);
20470 /* Optimize constant pool reference to immediates. This is used by fp
20471 moves, that force all constants to memory to allow combining. */
20472 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20474 rtx tmp
= maybe_get_pool_constant (operand
);
20479 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20481 /* The only non-offsetable memories we handle are pushes. */
20482 int ok
= push_operand (operand
, VOIDmode
);
20486 operand
= copy_rtx (operand
);
20487 PUT_MODE (operand
, word_mode
);
20488 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20492 if (GET_CODE (operand
) == CONST_VECTOR
)
20494 enum machine_mode imode
= int_mode_for_mode (mode
);
20495 /* Caution: if we looked through a constant pool memory above,
20496 the operand may actually have a different mode now. That's
20497 ok, since we want to pun this all the way back to an integer. */
20498 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20499 gcc_assert (operand
!= NULL
);
20505 if (mode
== DImode
)
20506 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20511 if (REG_P (operand
))
20513 gcc_assert (reload_completed
);
20514 for (i
= 0; i
< size
; i
++)
20515 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20517 else if (offsettable_memref_p (operand
))
20519 operand
= adjust_address (operand
, SImode
, 0);
20520 parts
[0] = operand
;
20521 for (i
= 1; i
< size
; i
++)
20522 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20524 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20529 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20533 real_to_target (l
, &r
, mode
);
20534 parts
[3] = gen_int_mode (l
[3], SImode
);
20535 parts
[2] = gen_int_mode (l
[2], SImode
);
20538 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20539 parts
[2] = gen_int_mode (l
[2], SImode
);
20542 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20545 gcc_unreachable ();
20547 parts
[1] = gen_int_mode (l
[1], SImode
);
20548 parts
[0] = gen_int_mode (l
[0], SImode
);
20551 gcc_unreachable ();
20556 if (mode
== TImode
)
20557 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20558 if (mode
== XFmode
|| mode
== TFmode
)
20560 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20561 if (REG_P (operand
))
20563 gcc_assert (reload_completed
);
20564 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20565 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20567 else if (offsettable_memref_p (operand
))
20569 operand
= adjust_address (operand
, DImode
, 0);
20570 parts
[0] = operand
;
20571 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20573 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20578 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20579 real_to_target (l
, &r
, mode
);
20581 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20582 if (HOST_BITS_PER_WIDE_INT
>= 64)
20585 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20586 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20589 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20591 if (upper_mode
== SImode
)
20592 parts
[1] = gen_int_mode (l
[2], SImode
);
20593 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20596 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20597 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20600 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20603 gcc_unreachable ();
20610 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20611 Return false when normal moves are needed; true when all required
20612 insns have been emitted. Operands 2-4 contain the input values
20613 int the correct order; operands 5-7 contain the output values. */
20616 ix86_split_long_move (rtx operands
[])
20621 int collisions
= 0;
20622 enum machine_mode mode
= GET_MODE (operands
[0]);
20623 bool collisionparts
[4];
20625 /* The DFmode expanders may ask us to move double.
20626 For 64bit target this is single move. By hiding the fact
20627 here we simplify i386.md splitters. */
20628 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20630 /* Optimize constant pool reference to immediates. This is used by
20631 fp moves, that force all constants to memory to allow combining. */
20633 if (MEM_P (operands
[1])
20634 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20635 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20636 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20637 if (push_operand (operands
[0], VOIDmode
))
20639 operands
[0] = copy_rtx (operands
[0]);
20640 PUT_MODE (operands
[0], word_mode
);
20643 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20644 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20645 emit_move_insn (operands
[0], operands
[1]);
20649 /* The only non-offsettable memory we handle is push. */
20650 if (push_operand (operands
[0], VOIDmode
))
20653 gcc_assert (!MEM_P (operands
[0])
20654 || offsettable_memref_p (operands
[0]));
20656 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20657 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20659 /* When emitting push, take care for source operands on the stack. */
20660 if (push
&& MEM_P (operands
[1])
20661 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20663 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20665 /* Compensate for the stack decrement by 4. */
20666 if (!TARGET_64BIT
&& nparts
== 3
20667 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20668 src_base
= plus_constant (Pmode
, src_base
, 4);
20670 /* src_base refers to the stack pointer and is
20671 automatically decreased by emitted push. */
20672 for (i
= 0; i
< nparts
; i
++)
20673 part
[1][i
] = change_address (part
[1][i
],
20674 GET_MODE (part
[1][i
]), src_base
);
20677 /* We need to do copy in the right order in case an address register
20678 of the source overlaps the destination. */
20679 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20683 for (i
= 0; i
< nparts
; i
++)
20686 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20687 if (collisionparts
[i
])
20691 /* Collision in the middle part can be handled by reordering. */
20692 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20694 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20695 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20697 else if (collisions
== 1
20699 && (collisionparts
[1] || collisionparts
[2]))
20701 if (collisionparts
[1])
20703 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20704 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20708 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20709 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20713 /* If there are more collisions, we can't handle it by reordering.
20714 Do an lea to the last part and use only one colliding move. */
20715 else if (collisions
> 1)
20721 base
= part
[0][nparts
- 1];
20723 /* Handle the case when the last part isn't valid for lea.
20724 Happens in 64-bit mode storing the 12-byte XFmode. */
20725 if (GET_MODE (base
) != Pmode
)
20726 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20728 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20729 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20730 for (i
= 1; i
< nparts
; i
++)
20732 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
20733 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20744 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20745 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
20746 stack_pointer_rtx
, GEN_INT (-4)));
20747 emit_move_insn (part
[0][2], part
[1][2]);
20749 else if (nparts
== 4)
20751 emit_move_insn (part
[0][3], part
[1][3]);
20752 emit_move_insn (part
[0][2], part
[1][2]);
20757 /* In 64bit mode we don't have 32bit push available. In case this is
20758 register, it is OK - we will just use larger counterpart. We also
20759 retype memory - these comes from attempt to avoid REX prefix on
20760 moving of second half of TFmode value. */
20761 if (GET_MODE (part
[1][1]) == SImode
)
20763 switch (GET_CODE (part
[1][1]))
20766 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20770 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20774 gcc_unreachable ();
20777 if (GET_MODE (part
[1][0]) == SImode
)
20778 part
[1][0] = part
[1][1];
20781 emit_move_insn (part
[0][1], part
[1][1]);
20782 emit_move_insn (part
[0][0], part
[1][0]);
20786 /* Choose correct order to not overwrite the source before it is copied. */
20787 if ((REG_P (part
[0][0])
20788 && REG_P (part
[1][1])
20789 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20791 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20793 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20795 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20797 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20799 operands
[2 + i
] = part
[0][j
];
20800 operands
[6 + i
] = part
[1][j
];
20805 for (i
= 0; i
< nparts
; i
++)
20807 operands
[2 + i
] = part
[0][i
];
20808 operands
[6 + i
] = part
[1][i
];
20812 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20813 if (optimize_insn_for_size_p ())
20815 for (j
= 0; j
< nparts
- 1; j
++)
20816 if (CONST_INT_P (operands
[6 + j
])
20817 && operands
[6 + j
] != const0_rtx
20818 && REG_P (operands
[2 + j
]))
20819 for (i
= j
; i
< nparts
- 1; i
++)
20820 if (CONST_INT_P (operands
[7 + i
])
20821 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20822 operands
[7 + i
] = operands
[2 + j
];
20825 for (i
= 0; i
< nparts
; i
++)
20826 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20831 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20832 left shift by a constant, either using a single shift or
20833 a sequence of add instructions. */
20836 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20838 rtx (*insn
)(rtx
, rtx
, rtx
);
20841 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20842 && !optimize_insn_for_size_p ()))
20844 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20845 while (count
-- > 0)
20846 emit_insn (insn (operand
, operand
, operand
));
20850 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20851 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20856 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20858 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20859 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20860 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20862 rtx low
[2], high
[2];
20865 if (CONST_INT_P (operands
[2]))
20867 split_double_mode (mode
, operands
, 2, low
, high
);
20868 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20870 if (count
>= half_width
)
20872 emit_move_insn (high
[0], low
[1]);
20873 emit_move_insn (low
[0], const0_rtx
);
20875 if (count
> half_width
)
20876 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
20880 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20882 if (!rtx_equal_p (operands
[0], operands
[1]))
20883 emit_move_insn (operands
[0], operands
[1]);
20885 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
20886 ix86_expand_ashl_const (low
[0], count
, mode
);
20891 split_double_mode (mode
, operands
, 1, low
, high
);
20893 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20895 if (operands
[1] == const1_rtx
)
20897 /* Assuming we've chosen a QImode capable registers, then 1 << N
20898 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20899 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
20901 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
20903 ix86_expand_clear (low
[0]);
20904 ix86_expand_clear (high
[0]);
20905 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
20907 d
= gen_lowpart (QImode
, low
[0]);
20908 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20909 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
20910 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20912 d
= gen_lowpart (QImode
, high
[0]);
20913 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20914 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
20915 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20918 /* Otherwise, we can get the same results by manually performing
20919 a bit extract operation on bit 5/6, and then performing the two
20920 shifts. The two methods of getting 0/1 into low/high are exactly
20921 the same size. Avoiding the shift in the bit extract case helps
20922 pentium4 a bit; no one else seems to care much either way. */
20925 enum machine_mode half_mode
;
20926 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
20927 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
20928 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
20929 HOST_WIDE_INT bits
;
20932 if (mode
== DImode
)
20934 half_mode
= SImode
;
20935 gen_lshr3
= gen_lshrsi3
;
20936 gen_and3
= gen_andsi3
;
20937 gen_xor3
= gen_xorsi3
;
20942 half_mode
= DImode
;
20943 gen_lshr3
= gen_lshrdi3
;
20944 gen_and3
= gen_anddi3
;
20945 gen_xor3
= gen_xordi3
;
20949 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
20950 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
20952 x
= gen_lowpart (half_mode
, operands
[2]);
20953 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
20955 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
20956 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
20957 emit_move_insn (low
[0], high
[0]);
20958 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
20961 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20962 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
20966 if (operands
[1] == constm1_rtx
)
20968 /* For -1 << N, we can avoid the shld instruction, because we
20969 know that we're shifting 0...31/63 ones into a -1. */
20970 emit_move_insn (low
[0], constm1_rtx
);
20971 if (optimize_insn_for_size_p ())
20972 emit_move_insn (high
[0], low
[0]);
20974 emit_move_insn (high
[0], constm1_rtx
);
20978 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20980 if (!rtx_equal_p (operands
[0], operands
[1]))
20981 emit_move_insn (operands
[0], operands
[1]);
20983 split_double_mode (mode
, operands
, 1, low
, high
);
20984 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
20987 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20989 if (TARGET_CMOVE
&& scratch
)
20991 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20992 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20994 ix86_expand_clear (scratch
);
20995 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
20999 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21000 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21002 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21007 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21009 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21010 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21011 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21012 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21014 rtx low
[2], high
[2];
21017 if (CONST_INT_P (operands
[2]))
21019 split_double_mode (mode
, operands
, 2, low
, high
);
21020 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21022 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21024 emit_move_insn (high
[0], high
[1]);
21025 emit_insn (gen_ashr3 (high
[0], high
[0],
21026 GEN_INT (half_width
- 1)));
21027 emit_move_insn (low
[0], high
[0]);
21030 else if (count
>= half_width
)
21032 emit_move_insn (low
[0], high
[1]);
21033 emit_move_insn (high
[0], low
[0]);
21034 emit_insn (gen_ashr3 (high
[0], high
[0],
21035 GEN_INT (half_width
- 1)));
21037 if (count
> half_width
)
21038 emit_insn (gen_ashr3 (low
[0], low
[0],
21039 GEN_INT (count
- half_width
)));
21043 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21045 if (!rtx_equal_p (operands
[0], operands
[1]))
21046 emit_move_insn (operands
[0], operands
[1]);
21048 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21049 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21054 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21056 if (!rtx_equal_p (operands
[0], operands
[1]))
21057 emit_move_insn (operands
[0], operands
[1]);
21059 split_double_mode (mode
, operands
, 1, low
, high
);
21061 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21062 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21064 if (TARGET_CMOVE
&& scratch
)
21066 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21067 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21069 emit_move_insn (scratch
, high
[0]);
21070 emit_insn (gen_ashr3 (scratch
, scratch
,
21071 GEN_INT (half_width
- 1)));
21072 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21077 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21078 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21080 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21086 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21088 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21089 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21090 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21091 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21093 rtx low
[2], high
[2];
21096 if (CONST_INT_P (operands
[2]))
21098 split_double_mode (mode
, operands
, 2, low
, high
);
21099 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21101 if (count
>= half_width
)
21103 emit_move_insn (low
[0], high
[1]);
21104 ix86_expand_clear (high
[0]);
21106 if (count
> half_width
)
21107 emit_insn (gen_lshr3 (low
[0], low
[0],
21108 GEN_INT (count
- half_width
)));
21112 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21114 if (!rtx_equal_p (operands
[0], operands
[1]))
21115 emit_move_insn (operands
[0], operands
[1]);
21117 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21118 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21123 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21125 if (!rtx_equal_p (operands
[0], operands
[1]))
21126 emit_move_insn (operands
[0], operands
[1]);
21128 split_double_mode (mode
, operands
, 1, low
, high
);
21130 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21131 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21133 if (TARGET_CMOVE
&& scratch
)
21135 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21136 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21138 ix86_expand_clear (scratch
);
21139 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21144 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21145 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21147 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21152 /* Predict just emitted jump instruction to be taken with probability PROB. */
21154 predict_jump (int prob
)
21156 rtx insn
= get_last_insn ();
21157 gcc_assert (JUMP_P (insn
));
21158 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21161 /* Helper function for the string operations below. Dest VARIABLE whether
21162 it is aligned to VALUE bytes. If true, jump to the label. */
21164 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21166 rtx label
= gen_label_rtx ();
21167 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21168 if (GET_MODE (variable
) == DImode
)
21169 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21171 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21172 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21175 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21177 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21181 /* Adjust COUNTER by the VALUE. */
21183 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21185 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21186 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21188 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21191 /* Zero extend possibly SImode EXP to Pmode register. */
21193 ix86_zero_extend_to_Pmode (rtx exp
)
21195 if (GET_MODE (exp
) != Pmode
)
21196 exp
= convert_to_mode (Pmode
, exp
, 1);
21197 return force_reg (Pmode
, exp
);
21200 /* Divide COUNTREG by SCALE. */
21202 scale_counter (rtx countreg
, int scale
)
21208 if (CONST_INT_P (countreg
))
21209 return GEN_INT (INTVAL (countreg
) / scale
);
21210 gcc_assert (REG_P (countreg
));
21212 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21213 GEN_INT (exact_log2 (scale
)),
21214 NULL
, 1, OPTAB_DIRECT
);
21218 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21219 DImode for constant loop counts. */
21221 static enum machine_mode
21222 counter_mode (rtx count_exp
)
21224 if (GET_MODE (count_exp
) != VOIDmode
)
21225 return GET_MODE (count_exp
);
21226 if (!CONST_INT_P (count_exp
))
21228 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21233 /* When SRCPTR is non-NULL, output simple loop to move memory
21234 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21235 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21236 equivalent loop to set memory by VALUE (supposed to be in MODE).
21238 The size is rounded down to whole number of chunk size moved at once.
21239 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21243 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21244 rtx destptr
, rtx srcptr
, rtx value
,
21245 rtx count
, enum machine_mode mode
, int unroll
,
21248 rtx out_label
, top_label
, iter
, tmp
;
21249 enum machine_mode iter_mode
= counter_mode (count
);
21250 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21251 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21257 top_label
= gen_label_rtx ();
21258 out_label
= gen_label_rtx ();
21259 iter
= gen_reg_rtx (iter_mode
);
21261 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21262 NULL
, 1, OPTAB_DIRECT
);
21263 /* Those two should combine. */
21264 if (piece_size
== const1_rtx
)
21266 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21268 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21270 emit_move_insn (iter
, const0_rtx
);
21272 emit_label (top_label
);
21274 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21275 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21276 destmem
= change_address (destmem
, mode
, x_addr
);
21280 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21281 srcmem
= change_address (srcmem
, mode
, y_addr
);
21283 /* When unrolling for chips that reorder memory reads and writes,
21284 we can save registers by using single temporary.
21285 Also using 4 temporaries is overkill in 32bit mode. */
21286 if (!TARGET_64BIT
&& 0)
21288 for (i
= 0; i
< unroll
; i
++)
21293 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21295 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21297 emit_move_insn (destmem
, srcmem
);
21303 gcc_assert (unroll
<= 4);
21304 for (i
= 0; i
< unroll
; i
++)
21306 tmpreg
[i
] = gen_reg_rtx (mode
);
21310 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21312 emit_move_insn (tmpreg
[i
], srcmem
);
21314 for (i
= 0; i
< unroll
; i
++)
21319 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21321 emit_move_insn (destmem
, tmpreg
[i
]);
21326 for (i
= 0; i
< unroll
; i
++)
21330 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21331 emit_move_insn (destmem
, value
);
21334 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21335 true, OPTAB_LIB_WIDEN
);
21337 emit_move_insn (iter
, tmp
);
21339 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21341 if (expected_size
!= -1)
21343 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21344 if (expected_size
== 0)
21346 else if (expected_size
> REG_BR_PROB_BASE
)
21347 predict_jump (REG_BR_PROB_BASE
- 1);
21349 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21352 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21353 iter
= ix86_zero_extend_to_Pmode (iter
);
21354 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21355 true, OPTAB_LIB_WIDEN
);
21356 if (tmp
!= destptr
)
21357 emit_move_insn (destptr
, tmp
);
21360 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21361 true, OPTAB_LIB_WIDEN
);
21363 emit_move_insn (srcptr
, tmp
);
21365 emit_label (out_label
);
21368 /* Output "rep; mov" instruction.
21369 Arguments have same meaning as for previous function */
21371 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21372 rtx destptr
, rtx srcptr
,
21374 enum machine_mode mode
)
21379 HOST_WIDE_INT rounded_count
;
21381 /* If the size is known, it is shorter to use rep movs. */
21382 if (mode
== QImode
&& CONST_INT_P (count
)
21383 && !(INTVAL (count
) & 3))
21386 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21387 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21388 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21389 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21390 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21391 if (mode
!= QImode
)
21393 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21394 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21395 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21396 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21397 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21398 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21402 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21403 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21405 if (CONST_INT_P (count
))
21407 rounded_count
= (INTVAL (count
)
21408 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21409 destmem
= shallow_copy_rtx (destmem
);
21410 srcmem
= shallow_copy_rtx (srcmem
);
21411 set_mem_size (destmem
, rounded_count
);
21412 set_mem_size (srcmem
, rounded_count
);
21416 if (MEM_SIZE_KNOWN_P (destmem
))
21417 clear_mem_size (destmem
);
21418 if (MEM_SIZE_KNOWN_P (srcmem
))
21419 clear_mem_size (srcmem
);
21421 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21425 /* Output "rep; stos" instruction.
21426 Arguments have same meaning as for previous function */
21428 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21429 rtx count
, enum machine_mode mode
,
21434 HOST_WIDE_INT rounded_count
;
21436 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21437 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21438 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21439 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21440 if (mode
!= QImode
)
21442 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21443 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21444 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21447 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21448 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21450 rounded_count
= (INTVAL (count
)
21451 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21452 destmem
= shallow_copy_rtx (destmem
);
21453 set_mem_size (destmem
, rounded_count
);
21455 else if (MEM_SIZE_KNOWN_P (destmem
))
21456 clear_mem_size (destmem
);
21457 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21461 emit_strmov (rtx destmem
, rtx srcmem
,
21462 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21464 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21465 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21466 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21469 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21471 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21472 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21475 if (CONST_INT_P (count
))
21477 HOST_WIDE_INT countval
= INTVAL (count
);
21480 if ((countval
& 0x10) && max_size
> 16)
21484 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21485 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21488 gcc_unreachable ();
21491 if ((countval
& 0x08) && max_size
> 8)
21494 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21497 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21498 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21502 if ((countval
& 0x04) && max_size
> 4)
21504 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21507 if ((countval
& 0x02) && max_size
> 2)
21509 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21512 if ((countval
& 0x01) && max_size
> 1)
21514 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21521 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21522 count
, 1, OPTAB_DIRECT
);
21523 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21524 count
, QImode
, 1, 4);
21528 /* When there are stringops, we can cheaply increase dest and src pointers.
21529 Otherwise we save code size by maintaining offset (zero is readily
21530 available from preceding rep operation) and using x86 addressing modes.
21532 if (TARGET_SINGLE_STRINGOP
)
21536 rtx label
= ix86_expand_aligntest (count
, 4, true);
21537 src
= change_address (srcmem
, SImode
, srcptr
);
21538 dest
= change_address (destmem
, SImode
, destptr
);
21539 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21540 emit_label (label
);
21541 LABEL_NUSES (label
) = 1;
21545 rtx label
= ix86_expand_aligntest (count
, 2, true);
21546 src
= change_address (srcmem
, HImode
, srcptr
);
21547 dest
= change_address (destmem
, HImode
, destptr
);
21548 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21549 emit_label (label
);
21550 LABEL_NUSES (label
) = 1;
21554 rtx label
= ix86_expand_aligntest (count
, 1, true);
21555 src
= change_address (srcmem
, QImode
, srcptr
);
21556 dest
= change_address (destmem
, QImode
, destptr
);
21557 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21558 emit_label (label
);
21559 LABEL_NUSES (label
) = 1;
21564 rtx offset
= force_reg (Pmode
, const0_rtx
);
21569 rtx label
= ix86_expand_aligntest (count
, 4, true);
21570 src
= change_address (srcmem
, SImode
, srcptr
);
21571 dest
= change_address (destmem
, SImode
, destptr
);
21572 emit_move_insn (dest
, src
);
21573 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21574 true, OPTAB_LIB_WIDEN
);
21576 emit_move_insn (offset
, tmp
);
21577 emit_label (label
);
21578 LABEL_NUSES (label
) = 1;
21582 rtx label
= ix86_expand_aligntest (count
, 2, true);
21583 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21584 src
= change_address (srcmem
, HImode
, tmp
);
21585 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21586 dest
= change_address (destmem
, HImode
, tmp
);
21587 emit_move_insn (dest
, src
);
21588 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21589 true, OPTAB_LIB_WIDEN
);
21591 emit_move_insn (offset
, tmp
);
21592 emit_label (label
);
21593 LABEL_NUSES (label
) = 1;
21597 rtx label
= ix86_expand_aligntest (count
, 1, true);
21598 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21599 src
= change_address (srcmem
, QImode
, tmp
);
21600 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21601 dest
= change_address (destmem
, QImode
, tmp
);
21602 emit_move_insn (dest
, src
);
21603 emit_label (label
);
21604 LABEL_NUSES (label
) = 1;
21609 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21611 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21612 rtx count
, int max_size
)
21615 expand_simple_binop (counter_mode (count
), AND
, count
,
21616 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21617 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21618 gen_lowpart (QImode
, value
), count
, QImode
,
21622 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21624 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21628 if (CONST_INT_P (count
))
21630 HOST_WIDE_INT countval
= INTVAL (count
);
21633 if ((countval
& 0x10) && max_size
> 16)
21637 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21638 emit_insn (gen_strset (destptr
, dest
, value
));
21639 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21640 emit_insn (gen_strset (destptr
, dest
, value
));
21643 gcc_unreachable ();
21646 if ((countval
& 0x08) && max_size
> 8)
21650 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21651 emit_insn (gen_strset (destptr
, dest
, value
));
21655 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21656 emit_insn (gen_strset (destptr
, dest
, value
));
21657 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21658 emit_insn (gen_strset (destptr
, dest
, value
));
21662 if ((countval
& 0x04) && max_size
> 4)
21664 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21665 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21668 if ((countval
& 0x02) && max_size
> 2)
21670 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21671 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21674 if ((countval
& 0x01) && max_size
> 1)
21676 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21677 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21684 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21689 rtx label
= ix86_expand_aligntest (count
, 16, true);
21692 dest
= change_address (destmem
, DImode
, destptr
);
21693 emit_insn (gen_strset (destptr
, dest
, value
));
21694 emit_insn (gen_strset (destptr
, dest
, value
));
21698 dest
= change_address (destmem
, SImode
, destptr
);
21699 emit_insn (gen_strset (destptr
, dest
, value
));
21700 emit_insn (gen_strset (destptr
, dest
, value
));
21701 emit_insn (gen_strset (destptr
, dest
, value
));
21702 emit_insn (gen_strset (destptr
, dest
, value
));
21704 emit_label (label
);
21705 LABEL_NUSES (label
) = 1;
21709 rtx label
= ix86_expand_aligntest (count
, 8, true);
21712 dest
= change_address (destmem
, DImode
, destptr
);
21713 emit_insn (gen_strset (destptr
, dest
, value
));
21717 dest
= change_address (destmem
, SImode
, destptr
);
21718 emit_insn (gen_strset (destptr
, dest
, value
));
21719 emit_insn (gen_strset (destptr
, dest
, value
));
21721 emit_label (label
);
21722 LABEL_NUSES (label
) = 1;
21726 rtx label
= ix86_expand_aligntest (count
, 4, true);
21727 dest
= change_address (destmem
, SImode
, destptr
);
21728 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21729 emit_label (label
);
21730 LABEL_NUSES (label
) = 1;
21734 rtx label
= ix86_expand_aligntest (count
, 2, true);
21735 dest
= change_address (destmem
, HImode
, destptr
);
21736 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21737 emit_label (label
);
21738 LABEL_NUSES (label
) = 1;
21742 rtx label
= ix86_expand_aligntest (count
, 1, true);
21743 dest
= change_address (destmem
, QImode
, destptr
);
21744 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21745 emit_label (label
);
21746 LABEL_NUSES (label
) = 1;
21750 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21751 DESIRED_ALIGNMENT. */
21753 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21754 rtx destptr
, rtx srcptr
, rtx count
,
21755 int align
, int desired_alignment
)
21757 if (align
<= 1 && desired_alignment
> 1)
21759 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21760 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21761 destmem
= change_address (destmem
, QImode
, destptr
);
21762 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21763 ix86_adjust_counter (count
, 1);
21764 emit_label (label
);
21765 LABEL_NUSES (label
) = 1;
21767 if (align
<= 2 && desired_alignment
> 2)
21769 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21770 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21771 destmem
= change_address (destmem
, HImode
, destptr
);
21772 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21773 ix86_adjust_counter (count
, 2);
21774 emit_label (label
);
21775 LABEL_NUSES (label
) = 1;
21777 if (align
<= 4 && desired_alignment
> 4)
21779 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21780 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21781 destmem
= change_address (destmem
, SImode
, destptr
);
21782 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21783 ix86_adjust_counter (count
, 4);
21784 emit_label (label
);
21785 LABEL_NUSES (label
) = 1;
21787 gcc_assert (desired_alignment
<= 8);
21790 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21791 ALIGN_BYTES is how many bytes need to be copied. */
21793 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21794 int desired_align
, int align_bytes
)
21797 rtx orig_dst
= dst
;
21798 rtx orig_src
= src
;
21800 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21801 if (src_align_bytes
>= 0)
21802 src_align_bytes
= desired_align
- src_align_bytes
;
21803 if (align_bytes
& 1)
21805 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21806 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21808 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21810 if (align_bytes
& 2)
21812 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21813 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21814 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21815 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21816 if (src_align_bytes
>= 0
21817 && (src_align_bytes
& 1) == (align_bytes
& 1)
21818 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21819 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21821 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21823 if (align_bytes
& 4)
21825 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21826 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21827 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21828 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21829 if (src_align_bytes
>= 0)
21831 unsigned int src_align
= 0;
21832 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21834 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21836 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21837 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21840 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21842 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21843 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21844 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21845 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21846 if (src_align_bytes
>= 0)
21848 unsigned int src_align
= 0;
21849 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21851 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21853 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21855 if (src_align
> (unsigned int) desired_align
)
21856 src_align
= desired_align
;
21857 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21858 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21860 if (MEM_SIZE_KNOWN_P (orig_dst
))
21861 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21862 if (MEM_SIZE_KNOWN_P (orig_src
))
21863 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
21868 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21869 DESIRED_ALIGNMENT. */
21871 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
21872 int align
, int desired_alignment
)
21874 if (align
<= 1 && desired_alignment
> 1)
21876 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21877 destmem
= change_address (destmem
, QImode
, destptr
);
21878 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
21879 ix86_adjust_counter (count
, 1);
21880 emit_label (label
);
21881 LABEL_NUSES (label
) = 1;
21883 if (align
<= 2 && desired_alignment
> 2)
21885 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21886 destmem
= change_address (destmem
, HImode
, destptr
);
21887 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
21888 ix86_adjust_counter (count
, 2);
21889 emit_label (label
);
21890 LABEL_NUSES (label
) = 1;
21892 if (align
<= 4 && desired_alignment
> 4)
21894 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21895 destmem
= change_address (destmem
, SImode
, destptr
);
21896 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
21897 ix86_adjust_counter (count
, 4);
21898 emit_label (label
);
21899 LABEL_NUSES (label
) = 1;
21901 gcc_assert (desired_alignment
<= 8);
21904 /* Set enough from DST to align DST known to by aligned by ALIGN to
21905 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21907 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
21908 int desired_align
, int align_bytes
)
21911 rtx orig_dst
= dst
;
21912 if (align_bytes
& 1)
21914 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21916 emit_insn (gen_strset (destreg
, dst
,
21917 gen_lowpart (QImode
, value
)));
21919 if (align_bytes
& 2)
21921 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21922 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21923 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21925 emit_insn (gen_strset (destreg
, dst
,
21926 gen_lowpart (HImode
, value
)));
21928 if (align_bytes
& 4)
21930 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21931 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21932 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21934 emit_insn (gen_strset (destreg
, dst
,
21935 gen_lowpart (SImode
, value
)));
21937 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21938 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21939 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21940 if (MEM_SIZE_KNOWN_P (orig_dst
))
21941 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21945 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21946 static enum stringop_alg
21947 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
21948 int *dynamic_check
)
21950 const struct stringop_algs
* algs
;
21951 bool optimize_for_speed
;
21952 /* Algorithms using the rep prefix want at least edi and ecx;
21953 additionally, memset wants eax and memcpy wants esi. Don't
21954 consider such algorithms if the user has appropriated those
21955 registers for their own purposes. */
21956 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
21958 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
21960 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21961 || (alg != rep_prefix_1_byte \
21962 && alg != rep_prefix_4_byte \
21963 && alg != rep_prefix_8_byte))
21964 const struct processor_costs
*cost
;
21966 /* Even if the string operation call is cold, we still might spend a lot
21967 of time processing large blocks. */
21968 if (optimize_function_for_size_p (cfun
)
21969 || (optimize_insn_for_size_p ()
21970 && expected_size
!= -1 && expected_size
< 256))
21971 optimize_for_speed
= false;
21973 optimize_for_speed
= true;
21975 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
21977 *dynamic_check
= -1;
21979 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
21981 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
21982 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
21983 return ix86_stringop_alg
;
21984 /* rep; movq or rep; movl is the smallest variant. */
21985 else if (!optimize_for_speed
)
21987 if (!count
|| (count
& 3))
21988 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
21990 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
21992 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21994 else if (expected_size
!= -1 && expected_size
< 4)
21995 return loop_1_byte
;
21996 else if (expected_size
!= -1)
21999 enum stringop_alg alg
= libcall
;
22000 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22002 /* We get here if the algorithms that were not libcall-based
22003 were rep-prefix based and we are unable to use rep prefixes
22004 based on global register usage. Break out of the loop and
22005 use the heuristic below. */
22006 if (algs
->size
[i
].max
== 0)
22008 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22010 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22012 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22014 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22015 last non-libcall inline algorithm. */
22016 if (TARGET_INLINE_ALL_STRINGOPS
)
22018 /* When the current size is best to be copied by a libcall,
22019 but we are still forced to inline, run the heuristic below
22020 that will pick code for medium sized blocks. */
22021 if (alg
!= libcall
)
22025 else if (ALG_USABLE_P (candidate
))
22029 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22031 /* When asked to inline the call anyway, try to pick meaningful choice.
22032 We look for maximal size of block that is faster to copy by hand and
22033 take blocks of at most of that size guessing that average size will
22034 be roughly half of the block.
22036 If this turns out to be bad, we might simply specify the preferred
22037 choice in ix86_costs. */
22038 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22039 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22042 enum stringop_alg alg
;
22044 bool any_alg_usable_p
= true;
22046 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22048 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22049 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22051 if (candidate
!= libcall
&& candidate
22052 && ALG_USABLE_P (candidate
))
22053 max
= algs
->size
[i
].max
;
22055 /* If there aren't any usable algorithms, then recursing on
22056 smaller sizes isn't going to find anything. Just return the
22057 simple byte-at-a-time copy loop. */
22058 if (!any_alg_usable_p
)
22060 /* Pick something reasonable. */
22061 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22062 *dynamic_check
= 128;
22063 return loop_1_byte
;
22067 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
22068 gcc_assert (*dynamic_check
== -1);
22069 gcc_assert (alg
!= libcall
);
22070 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22071 *dynamic_check
= max
;
22074 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22075 #undef ALG_USABLE_P
22078 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22079 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22081 decide_alignment (int align
,
22082 enum stringop_alg alg
,
22085 int desired_align
= 0;
22089 gcc_unreachable ();
22091 case unrolled_loop
:
22092 desired_align
= GET_MODE_SIZE (Pmode
);
22094 case rep_prefix_8_byte
:
22097 case rep_prefix_4_byte
:
22098 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22099 copying whole cacheline at once. */
22100 if (TARGET_PENTIUMPRO
)
22105 case rep_prefix_1_byte
:
22106 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22107 copying whole cacheline at once. */
22108 if (TARGET_PENTIUMPRO
)
22122 if (desired_align
< align
)
22123 desired_align
= align
;
22124 if (expected_size
!= -1 && expected_size
< 4)
22125 desired_align
= align
;
22126 return desired_align
;
22129 /* Return the smallest power of 2 greater than VAL. */
22131 smallest_pow2_greater_than (int val
)
22139 /* Expand string move (memcpy) operation. Use i386 string operations
22140 when profitable. expand_setmem contains similar code. The code
22141 depends upon architecture, block size and alignment, but always has
22142 the same overall structure:
22144 1) Prologue guard: Conditional that jumps up to epilogues for small
22145 blocks that can be handled by epilogue alone. This is faster
22146 but also needed for correctness, since prologue assume the block
22147 is larger than the desired alignment.
22149 Optional dynamic check for size and libcall for large
22150 blocks is emitted here too, with -minline-stringops-dynamically.
22152 2) Prologue: copy first few bytes in order to get destination
22153 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22154 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22155 copied. We emit either a jump tree on power of two sized
22156 blocks, or a byte loop.
22158 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22159 with specified algorithm.
22161 4) Epilogue: code copying tail of the block that is too small to be
22162 handled by main body (or up to size guarded by prologue guard). */
22165 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22166 rtx expected_align_exp
, rtx expected_size_exp
)
22172 rtx jump_around_label
= NULL
;
22173 HOST_WIDE_INT align
= 1;
22174 unsigned HOST_WIDE_INT count
= 0;
22175 HOST_WIDE_INT expected_size
= -1;
22176 int size_needed
= 0, epilogue_size_needed
;
22177 int desired_align
= 0, align_bytes
= 0;
22178 enum stringop_alg alg
;
22180 bool need_zero_guard
= false;
22182 if (CONST_INT_P (align_exp
))
22183 align
= INTVAL (align_exp
);
22184 /* i386 can do misaligned access on reasonably increased cost. */
22185 if (CONST_INT_P (expected_align_exp
)
22186 && INTVAL (expected_align_exp
) > align
)
22187 align
= INTVAL (expected_align_exp
);
22188 /* ALIGN is the minimum of destination and source alignment, but we care here
22189 just about destination alignment. */
22190 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22191 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22193 if (CONST_INT_P (count_exp
))
22194 count
= expected_size
= INTVAL (count_exp
);
22195 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22196 expected_size
= INTVAL (expected_size_exp
);
22198 /* Make sure we don't need to care about overflow later on. */
22199 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22202 /* Step 0: Decide on preferred algorithm, desired alignment and
22203 size of chunks to be copied by main loop. */
22205 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22206 desired_align
= decide_alignment (align
, alg
, expected_size
);
22208 if (!TARGET_ALIGN_STRINGOPS
)
22209 align
= desired_align
;
22211 if (alg
== libcall
)
22213 gcc_assert (alg
!= no_stringop
);
22215 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22216 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22217 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22222 gcc_unreachable ();
22224 need_zero_guard
= true;
22225 size_needed
= GET_MODE_SIZE (word_mode
);
22227 case unrolled_loop
:
22228 need_zero_guard
= true;
22229 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22231 case rep_prefix_8_byte
:
22234 case rep_prefix_4_byte
:
22237 case rep_prefix_1_byte
:
22241 need_zero_guard
= true;
22246 epilogue_size_needed
= size_needed
;
22248 /* Step 1: Prologue guard. */
22250 /* Alignment code needs count to be in register. */
22251 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22253 if (INTVAL (count_exp
) > desired_align
22254 && INTVAL (count_exp
) > size_needed
)
22257 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22258 if (align_bytes
<= 0)
22261 align_bytes
= desired_align
- align_bytes
;
22263 if (align_bytes
== 0)
22264 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22266 gcc_assert (desired_align
>= 1 && align
>= 1);
22268 /* Ensure that alignment prologue won't copy past end of block. */
22269 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22271 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22272 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22273 Make sure it is power of 2. */
22274 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22278 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22280 /* If main algorithm works on QImode, no epilogue is needed.
22281 For small sizes just don't align anything. */
22282 if (size_needed
== 1)
22283 desired_align
= align
;
22290 label
= gen_label_rtx ();
22291 emit_cmp_and_jump_insns (count_exp
,
22292 GEN_INT (epilogue_size_needed
),
22293 LTU
, 0, counter_mode (count_exp
), 1, label
);
22294 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22295 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22297 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22301 /* Emit code to decide on runtime whether library call or inline should be
22303 if (dynamic_check
!= -1)
22305 if (CONST_INT_P (count_exp
))
22307 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22309 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22310 count_exp
= const0_rtx
;
22316 rtx hot_label
= gen_label_rtx ();
22317 jump_around_label
= gen_label_rtx ();
22318 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22319 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22320 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22321 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22322 emit_jump (jump_around_label
);
22323 emit_label (hot_label
);
22327 /* Step 2: Alignment prologue. */
22329 if (desired_align
> align
)
22331 if (align_bytes
== 0)
22333 /* Except for the first move in epilogue, we no longer know
22334 constant offset in aliasing info. It don't seems to worth
22335 the pain to maintain it for the first move, so throw away
22337 src
= change_address (src
, BLKmode
, srcreg
);
22338 dst
= change_address (dst
, BLKmode
, destreg
);
22339 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22344 /* If we know how many bytes need to be stored before dst is
22345 sufficiently aligned, maintain aliasing info accurately. */
22346 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22347 desired_align
, align_bytes
);
22348 count_exp
= plus_constant (counter_mode (count_exp
),
22349 count_exp
, -align_bytes
);
22350 count
-= align_bytes
;
22352 if (need_zero_guard
22353 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22354 || (align_bytes
== 0
22355 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22356 + desired_align
- align
))))
22358 /* It is possible that we copied enough so the main loop will not
22360 gcc_assert (size_needed
> 1);
22361 if (label
== NULL_RTX
)
22362 label
= gen_label_rtx ();
22363 emit_cmp_and_jump_insns (count_exp
,
22364 GEN_INT (size_needed
),
22365 LTU
, 0, counter_mode (count_exp
), 1, label
);
22366 if (expected_size
== -1
22367 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22368 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22370 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22373 if (label
&& size_needed
== 1)
22375 emit_label (label
);
22376 LABEL_NUSES (label
) = 1;
22378 epilogue_size_needed
= 1;
22380 else if (label
== NULL_RTX
)
22381 epilogue_size_needed
= size_needed
;
22383 /* Step 3: Main loop. */
22389 gcc_unreachable ();
22391 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22392 count_exp
, QImode
, 1, expected_size
);
22395 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22396 count_exp
, word_mode
, 1, expected_size
);
22398 case unrolled_loop
:
22399 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22400 registers for 4 temporaries anyway. */
22401 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22402 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22405 case rep_prefix_8_byte
:
22406 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22409 case rep_prefix_4_byte
:
22410 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22413 case rep_prefix_1_byte
:
22414 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22418 /* Adjust properly the offset of src and dest memory for aliasing. */
22419 if (CONST_INT_P (count_exp
))
22421 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22422 (count
/ size_needed
) * size_needed
);
22423 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22424 (count
/ size_needed
) * size_needed
);
22428 src
= change_address (src
, BLKmode
, srcreg
);
22429 dst
= change_address (dst
, BLKmode
, destreg
);
22432 /* Step 4: Epilogue to copy the remaining bytes. */
22436 /* When the main loop is done, COUNT_EXP might hold original count,
22437 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22438 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22439 bytes. Compensate if needed. */
22441 if (size_needed
< epilogue_size_needed
)
22444 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22445 GEN_INT (size_needed
- 1), count_exp
, 1,
22447 if (tmp
!= count_exp
)
22448 emit_move_insn (count_exp
, tmp
);
22450 emit_label (label
);
22451 LABEL_NUSES (label
) = 1;
22454 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22455 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22456 epilogue_size_needed
);
22457 if (jump_around_label
)
22458 emit_label (jump_around_label
);
22462 /* Helper function for memcpy. For QImode value 0xXY produce
22463 0xXYXYXYXY of wide specified by MODE. This is essentially
22464 a * 0x10101010, but we can do slightly better than
22465 synth_mult by unwinding the sequence by hand on CPUs with
22468 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22470 enum machine_mode valmode
= GET_MODE (val
);
22472 int nops
= mode
== DImode
? 3 : 2;
22474 gcc_assert (mode
== SImode
|| mode
== DImode
);
22475 if (val
== const0_rtx
)
22476 return copy_to_mode_reg (mode
, const0_rtx
);
22477 if (CONST_INT_P (val
))
22479 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22483 if (mode
== DImode
)
22484 v
|= (v
<< 16) << 16;
22485 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22488 if (valmode
== VOIDmode
)
22490 if (valmode
!= QImode
)
22491 val
= gen_lowpart (QImode
, val
);
22492 if (mode
== QImode
)
22494 if (!TARGET_PARTIAL_REG_STALL
)
22496 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22497 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22498 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22499 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22501 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22502 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22503 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22508 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22510 if (!TARGET_PARTIAL_REG_STALL
)
22511 if (mode
== SImode
)
22512 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22514 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22517 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22518 NULL
, 1, OPTAB_DIRECT
);
22520 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22522 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22523 NULL
, 1, OPTAB_DIRECT
);
22524 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22525 if (mode
== SImode
)
22527 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22528 NULL
, 1, OPTAB_DIRECT
);
22529 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22534 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22535 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22536 alignment from ALIGN to DESIRED_ALIGN. */
22538 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22543 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22544 promoted_val
= promote_duplicated_reg (DImode
, val
);
22545 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22546 promoted_val
= promote_duplicated_reg (SImode
, val
);
22547 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22548 promoted_val
= promote_duplicated_reg (HImode
, val
);
22550 promoted_val
= val
;
22552 return promoted_val
;
22555 /* Expand string clear operation (bzero). Use i386 string operations when
22556 profitable. See expand_movmem comment for explanation of individual
22557 steps performed. */
22559 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22560 rtx expected_align_exp
, rtx expected_size_exp
)
22565 rtx jump_around_label
= NULL
;
22566 HOST_WIDE_INT align
= 1;
22567 unsigned HOST_WIDE_INT count
= 0;
22568 HOST_WIDE_INT expected_size
= -1;
22569 int size_needed
= 0, epilogue_size_needed
;
22570 int desired_align
= 0, align_bytes
= 0;
22571 enum stringop_alg alg
;
22572 rtx promoted_val
= NULL
;
22573 bool force_loopy_epilogue
= false;
22575 bool need_zero_guard
= false;
22577 if (CONST_INT_P (align_exp
))
22578 align
= INTVAL (align_exp
);
22579 /* i386 can do misaligned access on reasonably increased cost. */
22580 if (CONST_INT_P (expected_align_exp
)
22581 && INTVAL (expected_align_exp
) > align
)
22582 align
= INTVAL (expected_align_exp
);
22583 if (CONST_INT_P (count_exp
))
22584 count
= expected_size
= INTVAL (count_exp
);
22585 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22586 expected_size
= INTVAL (expected_size_exp
);
22588 /* Make sure we don't need to care about overflow later on. */
22589 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22592 /* Step 0: Decide on preferred algorithm, desired alignment and
22593 size of chunks to be copied by main loop. */
22595 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22596 desired_align
= decide_alignment (align
, alg
, expected_size
);
22598 if (!TARGET_ALIGN_STRINGOPS
)
22599 align
= desired_align
;
22601 if (alg
== libcall
)
22603 gcc_assert (alg
!= no_stringop
);
22605 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22606 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22611 gcc_unreachable ();
22613 need_zero_guard
= true;
22614 size_needed
= GET_MODE_SIZE (word_mode
);
22616 case unrolled_loop
:
22617 need_zero_guard
= true;
22618 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
22620 case rep_prefix_8_byte
:
22623 case rep_prefix_4_byte
:
22626 case rep_prefix_1_byte
:
22630 need_zero_guard
= true;
22634 epilogue_size_needed
= size_needed
;
22636 /* Step 1: Prologue guard. */
22638 /* Alignment code needs count to be in register. */
22639 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22641 if (INTVAL (count_exp
) > desired_align
22642 && INTVAL (count_exp
) > size_needed
)
22645 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22646 if (align_bytes
<= 0)
22649 align_bytes
= desired_align
- align_bytes
;
22651 if (align_bytes
== 0)
22653 enum machine_mode mode
= SImode
;
22654 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22656 count_exp
= force_reg (mode
, count_exp
);
22659 /* Do the cheap promotion to allow better CSE across the
22660 main loop and epilogue (ie one load of the big constant in the
22661 front of all code. */
22662 if (CONST_INT_P (val_exp
))
22663 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22664 desired_align
, align
);
22665 /* Ensure that alignment prologue won't copy past end of block. */
22666 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22668 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22669 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22670 Make sure it is power of 2. */
22671 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22673 /* To improve performance of small blocks, we jump around the VAL
22674 promoting mode. This mean that if the promoted VAL is not constant,
22675 we might not use it in the epilogue and have to use byte
22677 if (epilogue_size_needed
> 2 && !promoted_val
)
22678 force_loopy_epilogue
= true;
22681 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22683 /* If main algorithm works on QImode, no epilogue is needed.
22684 For small sizes just don't align anything. */
22685 if (size_needed
== 1)
22686 desired_align
= align
;
22693 label
= gen_label_rtx ();
22694 emit_cmp_and_jump_insns (count_exp
,
22695 GEN_INT (epilogue_size_needed
),
22696 LTU
, 0, counter_mode (count_exp
), 1, label
);
22697 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22698 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22700 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22703 if (dynamic_check
!= -1)
22705 rtx hot_label
= gen_label_rtx ();
22706 jump_around_label
= gen_label_rtx ();
22707 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22708 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22709 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22710 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22711 emit_jump (jump_around_label
);
22712 emit_label (hot_label
);
22715 /* Step 2: Alignment prologue. */
22717 /* Do the expensive promotion once we branched off the small blocks. */
22719 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22720 desired_align
, align
);
22721 gcc_assert (desired_align
>= 1 && align
>= 1);
22723 if (desired_align
> align
)
22725 if (align_bytes
== 0)
22727 /* Except for the first move in epilogue, we no longer know
22728 constant offset in aliasing info. It don't seems to worth
22729 the pain to maintain it for the first move, so throw away
22731 dst
= change_address (dst
, BLKmode
, destreg
);
22732 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22737 /* If we know how many bytes need to be stored before dst is
22738 sufficiently aligned, maintain aliasing info accurately. */
22739 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22740 desired_align
, align_bytes
);
22741 count_exp
= plus_constant (counter_mode (count_exp
),
22742 count_exp
, -align_bytes
);
22743 count
-= align_bytes
;
22745 if (need_zero_guard
22746 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22747 || (align_bytes
== 0
22748 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22749 + desired_align
- align
))))
22751 /* It is possible that we copied enough so the main loop will not
22753 gcc_assert (size_needed
> 1);
22754 if (label
== NULL_RTX
)
22755 label
= gen_label_rtx ();
22756 emit_cmp_and_jump_insns (count_exp
,
22757 GEN_INT (size_needed
),
22758 LTU
, 0, counter_mode (count_exp
), 1, label
);
22759 if (expected_size
== -1
22760 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22761 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22763 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22766 if (label
&& size_needed
== 1)
22768 emit_label (label
);
22769 LABEL_NUSES (label
) = 1;
22771 promoted_val
= val_exp
;
22772 epilogue_size_needed
= 1;
22774 else if (label
== NULL_RTX
)
22775 epilogue_size_needed
= size_needed
;
22777 /* Step 3: Main loop. */
22783 gcc_unreachable ();
22785 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22786 count_exp
, QImode
, 1, expected_size
);
22789 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22790 count_exp
, word_mode
, 1, expected_size
);
22792 case unrolled_loop
:
22793 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22794 count_exp
, word_mode
, 4, expected_size
);
22796 case rep_prefix_8_byte
:
22797 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22800 case rep_prefix_4_byte
:
22801 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22804 case rep_prefix_1_byte
:
22805 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22809 /* Adjust properly the offset of src and dest memory for aliasing. */
22810 if (CONST_INT_P (count_exp
))
22811 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22812 (count
/ size_needed
) * size_needed
);
22814 dst
= change_address (dst
, BLKmode
, destreg
);
22816 /* Step 4: Epilogue to copy the remaining bytes. */
22820 /* When the main loop is done, COUNT_EXP might hold original count,
22821 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22822 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22823 bytes. Compensate if needed. */
22825 if (size_needed
< epilogue_size_needed
)
22828 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22829 GEN_INT (size_needed
- 1), count_exp
, 1,
22831 if (tmp
!= count_exp
)
22832 emit_move_insn (count_exp
, tmp
);
22834 emit_label (label
);
22835 LABEL_NUSES (label
) = 1;
22838 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22840 if (force_loopy_epilogue
)
22841 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22842 epilogue_size_needed
);
22844 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22845 epilogue_size_needed
);
22847 if (jump_around_label
)
22848 emit_label (jump_around_label
);
22852 /* Expand the appropriate insns for doing strlen if not just doing
22855 out = result, initialized with the start address
22856 align_rtx = alignment of the address.
22857 scratch = scratch register, initialized with the startaddress when
22858 not aligned, otherwise undefined
22860 This is just the body. It needs the initializations mentioned above and
22861 some address computing at the end. These things are done in i386.md. */
22864 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
22868 rtx align_2_label
= NULL_RTX
;
22869 rtx align_3_label
= NULL_RTX
;
22870 rtx align_4_label
= gen_label_rtx ();
22871 rtx end_0_label
= gen_label_rtx ();
22873 rtx tmpreg
= gen_reg_rtx (SImode
);
22874 rtx scratch
= gen_reg_rtx (SImode
);
22878 if (CONST_INT_P (align_rtx
))
22879 align
= INTVAL (align_rtx
);
22881 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22883 /* Is there a known alignment and is it less than 4? */
22886 rtx scratch1
= gen_reg_rtx (Pmode
);
22887 emit_move_insn (scratch1
, out
);
22888 /* Is there a known alignment and is it not 2? */
22891 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
22892 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
22894 /* Leave just the 3 lower bits. */
22895 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
22896 NULL_RTX
, 0, OPTAB_WIDEN
);
22898 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22899 Pmode
, 1, align_4_label
);
22900 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
22901 Pmode
, 1, align_2_label
);
22902 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
22903 Pmode
, 1, align_3_label
);
22907 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22908 check if is aligned to 4 - byte. */
22910 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
22911 NULL_RTX
, 0, OPTAB_WIDEN
);
22913 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22914 Pmode
, 1, align_4_label
);
22917 mem
= change_address (src
, QImode
, out
);
22919 /* Now compare the bytes. */
22921 /* Compare the first n unaligned byte on a byte per byte basis. */
22922 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
22923 QImode
, 1, end_0_label
);
22925 /* Increment the address. */
22926 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22928 /* Not needed with an alignment of 2 */
22931 emit_label (align_2_label
);
22933 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22936 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22938 emit_label (align_3_label
);
22941 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22944 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22947 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22948 align this loop. It gives only huge programs, but does not help to
22950 emit_label (align_4_label
);
22952 mem
= change_address (src
, SImode
, out
);
22953 emit_move_insn (scratch
, mem
);
22954 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
22956 /* This formula yields a nonzero result iff one of the bytes is zero.
22957 This saves three branches inside loop and many cycles. */
22959 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
22960 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
22961 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
22962 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
22963 gen_int_mode (0x80808080, SImode
)));
22964 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
22969 rtx reg
= gen_reg_rtx (SImode
);
22970 rtx reg2
= gen_reg_rtx (Pmode
);
22971 emit_move_insn (reg
, tmpreg
);
22972 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
22974 /* If zero is not in the first two bytes, move two bytes forward. */
22975 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22976 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22977 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22978 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
22979 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
22982 /* Emit lea manually to avoid clobbering of flags. */
22983 emit_insn (gen_rtx_SET (SImode
, reg2
,
22984 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
22986 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22987 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22988 emit_insn (gen_rtx_SET (VOIDmode
, out
,
22989 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
22995 rtx end_2_label
= gen_label_rtx ();
22996 /* Is zero in the first two bytes? */
22998 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22999 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23000 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23001 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23002 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23004 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23005 JUMP_LABEL (tmp
) = end_2_label
;
23007 /* Not in the first two. Move two bytes forward. */
23008 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23009 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23011 emit_label (end_2_label
);
23015 /* Avoid branch in fixing the byte. */
23016 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23017 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23018 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23019 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23020 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23022 emit_label (end_0_label
);
23025 /* Expand strlen. */
23028 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23030 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23032 /* The generic case of strlen expander is long. Avoid it's
23033 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23035 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23036 && !TARGET_INLINE_ALL_STRINGOPS
23037 && !optimize_insn_for_size_p ()
23038 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23041 addr
= force_reg (Pmode
, XEXP (src
, 0));
23042 scratch1
= gen_reg_rtx (Pmode
);
23044 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23045 && !optimize_insn_for_size_p ())
23047 /* Well it seems that some optimizer does not combine a call like
23048 foo(strlen(bar), strlen(bar));
23049 when the move and the subtraction is done here. It does calculate
23050 the length just once when these instructions are done inside of
23051 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23052 often used and I use one fewer register for the lifetime of
23053 output_strlen_unroll() this is better. */
23055 emit_move_insn (out
, addr
);
23057 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23059 /* strlensi_unroll_1 returns the address of the zero at the end of
23060 the string, like memchr(), so compute the length by subtracting
23061 the start address. */
23062 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23068 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23069 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23072 scratch2
= gen_reg_rtx (Pmode
);
23073 scratch3
= gen_reg_rtx (Pmode
);
23074 scratch4
= force_reg (Pmode
, constm1_rtx
);
23076 emit_move_insn (scratch3
, addr
);
23077 eoschar
= force_reg (QImode
, eoschar
);
23079 src
= replace_equiv_address_nv (src
, scratch3
);
23081 /* If .md starts supporting :P, this can be done in .md. */
23082 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23083 scratch4
), UNSPEC_SCAS
);
23084 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23085 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23086 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23091 /* For given symbol (function) construct code to compute address of it's PLT
23092 entry in large x86-64 PIC model. */
23094 construct_plt_address (rtx symbol
)
23098 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23099 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23100 gcc_assert (Pmode
== DImode
);
23102 tmp
= gen_reg_rtx (Pmode
);
23103 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23105 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23106 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23111 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23113 rtx pop
, bool sibcall
)
23115 /* We need to represent that SI and DI registers are clobbered
23117 static int clobbered_registers
[] = {
23118 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23119 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23120 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23121 XMM15_REG
, SI_REG
, DI_REG
23123 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23124 rtx use
= NULL
, call
;
23125 unsigned int vec_len
;
23127 if (pop
== const0_rtx
)
23129 gcc_assert (!TARGET_64BIT
|| !pop
);
23131 if (TARGET_MACHO
&& !TARGET_64BIT
)
23134 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23135 fnaddr
= machopic_indirect_call_target (fnaddr
);
23140 /* Static functions and indirect calls don't need the pic register. */
23141 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23142 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23143 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23144 use_reg (&use
, pic_offset_table_rtx
);
23147 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23149 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23150 emit_move_insn (al
, callarg2
);
23151 use_reg (&use
, al
);
23154 if (ix86_cmodel
== CM_LARGE_PIC
23156 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23157 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23158 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23160 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23161 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23163 fnaddr
= XEXP (fnaddr
, 0);
23164 if (GET_MODE (fnaddr
) != word_mode
)
23165 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23166 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23170 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23172 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23173 vec
[vec_len
++] = call
;
23177 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23178 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23179 vec
[vec_len
++] = pop
;
23182 if (TARGET_64BIT_MS_ABI
23183 && (!callarg2
|| INTVAL (callarg2
) != -2))
23187 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23188 UNSPEC_MS_TO_SYSV_CALL
);
23190 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23192 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23194 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23196 clobbered_registers
[i
]));
23199 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23200 if (TARGET_VZEROUPPER
)
23203 if (cfun
->machine
->callee_pass_avx256_p
)
23205 if (cfun
->machine
->callee_return_avx256_p
)
23206 avx256
= callee_return_pass_avx256
;
23208 avx256
= callee_pass_avx256
;
23210 else if (cfun
->machine
->callee_return_avx256_p
)
23211 avx256
= callee_return_avx256
;
23213 avx256
= call_no_avx256
;
23215 if (reload_completed
)
23216 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23218 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23219 gen_rtvec (1, GEN_INT (avx256
)),
23220 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23224 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23225 call
= emit_call_insn (call
);
23227 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23233 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23235 rtx pat
= PATTERN (insn
);
23236 rtvec vec
= XVEC (pat
, 0);
23237 int len
= GET_NUM_ELEM (vec
) - 1;
23239 /* Strip off the last entry of the parallel. */
23240 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23241 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23243 pat
= RTVEC_ELT (vec
, 0);
23245 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23247 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23248 emit_call_insn (pat
);
23251 /* Output the assembly for a call instruction. */
23254 ix86_output_call_insn (rtx insn
, rtx call_op
)
23256 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23257 bool seh_nop_p
= false;
23260 if (SIBLING_CALL_P (insn
))
23264 /* SEH epilogue detection requires the indirect branch case
23265 to include REX.W. */
23266 else if (TARGET_SEH
)
23267 xasm
= "rex.W jmp %A0";
23271 output_asm_insn (xasm
, &call_op
);
23275 /* SEH unwinding can require an extra nop to be emitted in several
23276 circumstances. Determine if we have one of those. */
23281 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23283 /* If we get to another real insn, we don't need the nop. */
23287 /* If we get to the epilogue note, prevent a catch region from
23288 being adjacent to the standard epilogue sequence. If non-
23289 call-exceptions, we'll have done this during epilogue emission. */
23290 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23291 && !flag_non_call_exceptions
23292 && !can_throw_internal (insn
))
23299 /* If we didn't find a real insn following the call, prevent the
23300 unwinder from looking into the next function. */
23306 xasm
= "call\t%P0";
23308 xasm
= "call\t%A0";
23310 output_asm_insn (xasm
, &call_op
);
23318 /* Clear stack slot assignments remembered from previous functions.
23319 This is called from INIT_EXPANDERS once before RTL is emitted for each
23322 static struct machine_function
*
23323 ix86_init_machine_status (void)
23325 struct machine_function
*f
;
23327 f
= ggc_alloc_cleared_machine_function ();
23328 f
->use_fast_prologue_epilogue_nregs
= -1;
23329 f
->tls_descriptor_call_expanded_p
= 0;
23330 f
->call_abi
= ix86_abi
;
23335 /* Return a MEM corresponding to a stack slot with mode MODE.
23336 Allocate a new slot if necessary.
23338 The RTL for a function can have several slots available: N is
23339 which slot to use. */
23342 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23344 struct stack_local_entry
*s
;
23346 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23348 /* Virtual slot is valid only before vregs are instantiated. */
23349 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23351 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23352 if (s
->mode
== mode
&& s
->n
== n
)
23353 return validize_mem (copy_rtx (s
->rtl
));
23355 s
= ggc_alloc_stack_local_entry ();
23358 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23360 s
->next
= ix86_stack_locals
;
23361 ix86_stack_locals
= s
;
23362 return validize_mem (s
->rtl
);
23365 /* Calculate the length of the memory address in the instruction encoding.
23366 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23367 or other prefixes. */
23370 memory_address_length (rtx addr
)
23372 struct ix86_address parts
;
23373 rtx base
, index
, disp
;
23377 if (GET_CODE (addr
) == PRE_DEC
23378 || GET_CODE (addr
) == POST_INC
23379 || GET_CODE (addr
) == PRE_MODIFY
23380 || GET_CODE (addr
) == POST_MODIFY
)
23383 ok
= ix86_decompose_address (addr
, &parts
);
23386 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23387 parts
.base
= SUBREG_REG (parts
.base
);
23388 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23389 parts
.index
= SUBREG_REG (parts
.index
);
23392 index
= parts
.index
;
23395 /* Add length of addr32 prefix. */
23396 len
= (GET_CODE (addr
) == ZERO_EXTEND
23397 || GET_CODE (addr
) == AND
);
23400 - esp as the base always wants an index,
23401 - ebp as the base always wants a displacement,
23402 - r12 as the base always wants an index,
23403 - r13 as the base always wants a displacement. */
23405 /* Register Indirect. */
23406 if (base
&& !index
&& !disp
)
23408 /* esp (for its index) and ebp (for its displacement) need
23409 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23412 && (addr
== arg_pointer_rtx
23413 || addr
== frame_pointer_rtx
23414 || REGNO (addr
) == SP_REG
23415 || REGNO (addr
) == BP_REG
23416 || REGNO (addr
) == R12_REG
23417 || REGNO (addr
) == R13_REG
))
23421 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23422 is not disp32, but disp32(%rip), so for disp32
23423 SIB byte is needed, unless print_operand_address
23424 optimizes it into disp32(%rip) or (%rip) is implied
23426 else if (disp
&& !base
&& !index
)
23433 if (GET_CODE (disp
) == CONST
)
23434 symbol
= XEXP (disp
, 0);
23435 if (GET_CODE (symbol
) == PLUS
23436 && CONST_INT_P (XEXP (symbol
, 1)))
23437 symbol
= XEXP (symbol
, 0);
23439 if (GET_CODE (symbol
) != LABEL_REF
23440 && (GET_CODE (symbol
) != SYMBOL_REF
23441 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23442 && (GET_CODE (symbol
) != UNSPEC
23443 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23444 && XINT (symbol
, 1) != UNSPEC_PCREL
23445 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23452 /* Find the length of the displacement constant. */
23455 if (base
&& satisfies_constraint_K (disp
))
23460 /* ebp always wants a displacement. Similarly r13. */
23461 else if (base
&& REG_P (base
)
23462 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23465 /* An index requires the two-byte modrm form.... */
23467 /* ...like esp (or r12), which always wants an index. */
23468 || base
== arg_pointer_rtx
23469 || base
== frame_pointer_rtx
23470 || (base
&& REG_P (base
)
23471 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23488 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23489 is set, expect that insn have 8bit immediate alternative. */
23491 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23495 extract_insn_cached (insn
);
23496 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23497 if (CONSTANT_P (recog_data
.operand
[i
]))
23499 enum attr_mode mode
= get_attr_mode (insn
);
23502 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23504 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23511 ival
= trunc_int_for_mode (ival
, HImode
);
23514 ival
= trunc_int_for_mode (ival
, SImode
);
23519 if (IN_RANGE (ival
, -128, 127))
23536 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23541 fatal_insn ("unknown insn mode", insn
);
23546 /* Compute default value for "length_address" attribute. */
23548 ix86_attr_length_address_default (rtx insn
)
23552 if (get_attr_type (insn
) == TYPE_LEA
)
23554 rtx set
= PATTERN (insn
), addr
;
23556 if (GET_CODE (set
) == PARALLEL
)
23557 set
= XVECEXP (set
, 0, 0);
23559 gcc_assert (GET_CODE (set
) == SET
);
23561 addr
= SET_SRC (set
);
23562 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23564 if (GET_CODE (addr
) == ZERO_EXTEND
)
23565 addr
= XEXP (addr
, 0);
23566 if (GET_CODE (addr
) == SUBREG
)
23567 addr
= SUBREG_REG (addr
);
23570 return memory_address_length (addr
);
23573 extract_insn_cached (insn
);
23574 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23575 if (MEM_P (recog_data
.operand
[i
]))
23577 constrain_operands_cached (reload_completed
);
23578 if (which_alternative
!= -1)
23580 const char *constraints
= recog_data
.constraints
[i
];
23581 int alt
= which_alternative
;
23583 while (*constraints
== '=' || *constraints
== '+')
23586 while (*constraints
++ != ',')
23588 /* Skip ignored operands. */
23589 if (*constraints
== 'X')
23592 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23597 /* Compute default value for "length_vex" attribute. It includes
23598 2 or 3 byte VEX prefix and 1 opcode byte. */
23601 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23605 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23606 byte VEX prefix. */
23607 if (!has_0f_opcode
|| has_vex_w
)
23610 /* We can always use 2 byte VEX prefix in 32bit. */
23614 extract_insn_cached (insn
);
23616 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23617 if (REG_P (recog_data
.operand
[i
]))
23619 /* REX.W bit uses 3 byte VEX prefix. */
23620 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23621 && GENERAL_REG_P (recog_data
.operand
[i
]))
23626 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23627 if (MEM_P (recog_data
.operand
[i
])
23628 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23635 /* Return the maximum number of instructions a cpu can issue. */
23638 ix86_issue_rate (void)
23642 case PROCESSOR_PENTIUM
:
23643 case PROCESSOR_ATOM
:
23647 case PROCESSOR_PENTIUMPRO
:
23648 case PROCESSOR_PENTIUM4
:
23649 case PROCESSOR_CORE2_32
:
23650 case PROCESSOR_CORE2_64
:
23651 case PROCESSOR_COREI7_32
:
23652 case PROCESSOR_COREI7_64
:
23653 case PROCESSOR_ATHLON
:
23655 case PROCESSOR_AMDFAM10
:
23656 case PROCESSOR_NOCONA
:
23657 case PROCESSOR_GENERIC32
:
23658 case PROCESSOR_GENERIC64
:
23659 case PROCESSOR_BDVER1
:
23660 case PROCESSOR_BDVER2
:
23661 case PROCESSOR_BTVER1
:
23669 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23670 by DEP_INSN and nothing set by DEP_INSN. */
23673 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23677 /* Simplify the test for uninteresting insns. */
23678 if (insn_type
!= TYPE_SETCC
23679 && insn_type
!= TYPE_ICMOV
23680 && insn_type
!= TYPE_FCMOV
23681 && insn_type
!= TYPE_IBR
)
23684 if ((set
= single_set (dep_insn
)) != 0)
23686 set
= SET_DEST (set
);
23689 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23690 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23691 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23692 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23694 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23695 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23700 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23703 /* This test is true if the dependent insn reads the flags but
23704 not any other potentially set register. */
23705 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23708 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23714 /* Return true iff USE_INSN has a memory address with operands set by
23718 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23721 extract_insn_cached (use_insn
);
23722 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23723 if (MEM_P (recog_data
.operand
[i
]))
23725 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23726 return modified_in_p (addr
, set_insn
) != 0;
23732 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23734 enum attr_type insn_type
, dep_insn_type
;
23735 enum attr_memory memory
;
23737 int dep_insn_code_number
;
23739 /* Anti and output dependencies have zero cost on all CPUs. */
23740 if (REG_NOTE_KIND (link
) != 0)
23743 dep_insn_code_number
= recog_memoized (dep_insn
);
23745 /* If we can't recognize the insns, we can't really do anything. */
23746 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23749 insn_type
= get_attr_type (insn
);
23750 dep_insn_type
= get_attr_type (dep_insn
);
23754 case PROCESSOR_PENTIUM
:
23755 /* Address Generation Interlock adds a cycle of latency. */
23756 if (insn_type
== TYPE_LEA
)
23758 rtx addr
= PATTERN (insn
);
23760 if (GET_CODE (addr
) == PARALLEL
)
23761 addr
= XVECEXP (addr
, 0, 0);
23763 gcc_assert (GET_CODE (addr
) == SET
);
23765 addr
= SET_SRC (addr
);
23766 if (modified_in_p (addr
, dep_insn
))
23769 else if (ix86_agi_dependent (dep_insn
, insn
))
23772 /* ??? Compares pair with jump/setcc. */
23773 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23776 /* Floating point stores require value to be ready one cycle earlier. */
23777 if (insn_type
== TYPE_FMOV
23778 && get_attr_memory (insn
) == MEMORY_STORE
23779 && !ix86_agi_dependent (dep_insn
, insn
))
23783 case PROCESSOR_PENTIUMPRO
:
23784 memory
= get_attr_memory (insn
);
23786 /* INT->FP conversion is expensive. */
23787 if (get_attr_fp_int_src (dep_insn
))
23790 /* There is one cycle extra latency between an FP op and a store. */
23791 if (insn_type
== TYPE_FMOV
23792 && (set
= single_set (dep_insn
)) != NULL_RTX
23793 && (set2
= single_set (insn
)) != NULL_RTX
23794 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23795 && MEM_P (SET_DEST (set2
)))
23798 /* Show ability of reorder buffer to hide latency of load by executing
23799 in parallel with previous instruction in case
23800 previous instruction is not needed to compute the address. */
23801 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23802 && !ix86_agi_dependent (dep_insn
, insn
))
23804 /* Claim moves to take one cycle, as core can issue one load
23805 at time and the next load can start cycle later. */
23806 if (dep_insn_type
== TYPE_IMOV
23807 || dep_insn_type
== TYPE_FMOV
)
23815 memory
= get_attr_memory (insn
);
23817 /* The esp dependency is resolved before the instruction is really
23819 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23820 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23823 /* INT->FP conversion is expensive. */
23824 if (get_attr_fp_int_src (dep_insn
))
23827 /* Show ability of reorder buffer to hide latency of load by executing
23828 in parallel with previous instruction in case
23829 previous instruction is not needed to compute the address. */
23830 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23831 && !ix86_agi_dependent (dep_insn
, insn
))
23833 /* Claim moves to take one cycle, as core can issue one load
23834 at time and the next load can start cycle later. */
23835 if (dep_insn_type
== TYPE_IMOV
23836 || dep_insn_type
== TYPE_FMOV
)
23845 case PROCESSOR_ATHLON
:
23847 case PROCESSOR_AMDFAM10
:
23848 case PROCESSOR_BDVER1
:
23849 case PROCESSOR_BDVER2
:
23850 case PROCESSOR_BTVER1
:
23851 case PROCESSOR_ATOM
:
23852 case PROCESSOR_GENERIC32
:
23853 case PROCESSOR_GENERIC64
:
23854 memory
= get_attr_memory (insn
);
23856 /* Show ability of reorder buffer to hide latency of load by executing
23857 in parallel with previous instruction in case
23858 previous instruction is not needed to compute the address. */
23859 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23860 && !ix86_agi_dependent (dep_insn
, insn
))
23862 enum attr_unit unit
= get_attr_unit (insn
);
23865 /* Because of the difference between the length of integer and
23866 floating unit pipeline preparation stages, the memory operands
23867 for floating point are cheaper.
23869 ??? For Athlon it the difference is most probably 2. */
23870 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
23873 loadcost
= TARGET_ATHLON
? 2 : 0;
23875 if (cost
>= loadcost
)
23888 /* How many alternative schedules to try. This should be as wide as the
23889 scheduling freedom in the DFA, but no wider. Making this value too
23890 large results extra work for the scheduler. */
23893 ia32_multipass_dfa_lookahead (void)
23897 case PROCESSOR_PENTIUM
:
23900 case PROCESSOR_PENTIUMPRO
:
23904 case PROCESSOR_CORE2_32
:
23905 case PROCESSOR_CORE2_64
:
23906 case PROCESSOR_COREI7_32
:
23907 case PROCESSOR_COREI7_64
:
23908 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23909 as many instructions can be executed on a cycle, i.e.,
23910 issue_rate. I wonder why tuning for many CPUs does not do this. */
23911 return ix86_issue_rate ();
23918 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
23919 execution. It is applied if
23920 (1) IMUL instruction is on the top of list;
23921 (2) There exists the only producer of independent IMUL instruction in
23923 (3) Put found producer on the top of ready list.
23924 Returns issue rate. */
23927 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
23928 int clock_var ATTRIBUTE_UNUSED
)
23930 static int issue_rate
= -1;
23931 int n_ready
= *pn_ready
;
23932 rtx insn
, insn1
, insn2
;
23934 sd_iterator_def sd_it
;
23938 /* Set up issue rate. */
23939 issue_rate
= ix86_issue_rate();
23941 /* Do reodering for Atom only. */
23942 if (ix86_tune
!= PROCESSOR_ATOM
)
23944 /* Nothing to do if ready list contains only 1 instruction. */
23948 /* Check that IMUL instruction is on the top of ready list. */
23949 insn
= ready
[n_ready
- 1];
23950 if (!NONDEBUG_INSN_P (insn
))
23952 insn
= PATTERN (insn
);
23953 if (GET_CODE (insn
) == PARALLEL
)
23954 insn
= XVECEXP (insn
, 0, 0);
23955 if (GET_CODE (insn
) != SET
)
23957 if (!(GET_CODE (SET_SRC (insn
)) == MULT
23958 && GET_MODE (SET_SRC (insn
)) == SImode
))
23961 /* Search for producer of independent IMUL instruction. */
23962 for (i
= n_ready
- 2; i
>= 0; i
--)
23965 if (!NONDEBUG_INSN_P (insn
))
23967 /* Skip IMUL instruction. */
23968 insn2
= PATTERN (insn
);
23969 if (GET_CODE (insn2
) == PARALLEL
)
23970 insn2
= XVECEXP (insn2
, 0, 0);
23971 if (GET_CODE (insn2
) == SET
23972 && GET_CODE (SET_SRC (insn2
)) == MULT
23973 && GET_MODE (SET_SRC (insn2
)) == SImode
)
23976 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
23979 con
= DEP_CON (dep
);
23980 if (!NONDEBUG_INSN_P (con
))
23982 insn1
= PATTERN (con
);
23983 if (GET_CODE (insn1
) == PARALLEL
)
23984 insn1
= XVECEXP (insn1
, 0, 0);
23986 if (GET_CODE (insn1
) == SET
23987 && GET_CODE (SET_SRC (insn1
)) == MULT
23988 && GET_MODE (SET_SRC (insn1
)) == SImode
)
23990 sd_iterator_def sd_it1
;
23992 /* Check if there is no other dependee for IMUL. */
23994 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
23997 pro
= DEP_PRO (dep1
);
23998 if (!NONDEBUG_INSN_P (pro
))
24011 return issue_rate
; /* Didn't find IMUL producer. */
24013 if (sched_verbose
> 1)
24014 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24015 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24017 /* Put IMUL producer (ready[index]) at the top of ready list. */
24018 insn1
= ready
[index
];
24019 for (i
= index
; i
< n_ready
- 1; i
++)
24020 ready
[i
] = ready
[i
+ 1];
24021 ready
[n_ready
- 1] = insn1
;
24028 /* Model decoder of Core 2/i7.
24029 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24030 track the instruction fetch block boundaries and make sure that long
24031 (9+ bytes) instructions are assigned to D0. */
24033 /* Maximum length of an insn that can be handled by
24034 a secondary decoder unit. '8' for Core 2/i7. */
24035 static int core2i7_secondary_decoder_max_insn_size
;
24037 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24038 '16' for Core 2/i7. */
24039 static int core2i7_ifetch_block_size
;
24041 /* Maximum number of instructions decoder can handle per cycle.
24042 '6' for Core 2/i7. */
24043 static int core2i7_ifetch_block_max_insns
;
24045 typedef struct ix86_first_cycle_multipass_data_
*
24046 ix86_first_cycle_multipass_data_t
;
24047 typedef const struct ix86_first_cycle_multipass_data_
*
24048 const_ix86_first_cycle_multipass_data_t
;
24050 /* A variable to store target state across calls to max_issue within
24052 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24053 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24055 /* Initialize DATA. */
24057 core2i7_first_cycle_multipass_init (void *_data
)
24059 ix86_first_cycle_multipass_data_t data
24060 = (ix86_first_cycle_multipass_data_t
) _data
;
24062 data
->ifetch_block_len
= 0;
24063 data
->ifetch_block_n_insns
= 0;
24064 data
->ready_try_change
= NULL
;
24065 data
->ready_try_change_size
= 0;
24068 /* Advancing the cycle; reset ifetch block counts. */
24070 core2i7_dfa_post_advance_cycle (void)
24072 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24074 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24076 data
->ifetch_block_len
= 0;
24077 data
->ifetch_block_n_insns
= 0;
24080 static int min_insn_size (rtx
);
24082 /* Filter out insns from ready_try that the core will not be able to issue
24083 on current cycle due to decoder. */
24085 core2i7_first_cycle_multipass_filter_ready_try
24086 (const_ix86_first_cycle_multipass_data_t data
,
24087 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24094 if (ready_try
[n_ready
])
24097 insn
= get_ready_element (n_ready
);
24098 insn_size
= min_insn_size (insn
);
24100 if (/* If this is a too long an insn for a secondary decoder ... */
24101 (!first_cycle_insn_p
24102 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24103 /* ... or it would not fit into the ifetch block ... */
24104 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24105 /* ... or the decoder is full already ... */
24106 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24107 /* ... mask the insn out. */
24109 ready_try
[n_ready
] = 1;
24111 if (data
->ready_try_change
)
24112 SET_BIT (data
->ready_try_change
, n_ready
);
24117 /* Prepare for a new round of multipass lookahead scheduling. */
24119 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24120 bool first_cycle_insn_p
)
24122 ix86_first_cycle_multipass_data_t data
24123 = (ix86_first_cycle_multipass_data_t
) _data
;
24124 const_ix86_first_cycle_multipass_data_t prev_data
24125 = ix86_first_cycle_multipass_data
;
24127 /* Restore the state from the end of the previous round. */
24128 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24129 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24131 /* Filter instructions that cannot be issued on current cycle due to
24132 decoder restrictions. */
24133 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24134 first_cycle_insn_p
);
24137 /* INSN is being issued in current solution. Account for its impact on
24138 the decoder model. */
24140 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24141 rtx insn
, const void *_prev_data
)
24143 ix86_first_cycle_multipass_data_t data
24144 = (ix86_first_cycle_multipass_data_t
) _data
;
24145 const_ix86_first_cycle_multipass_data_t prev_data
24146 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24148 int insn_size
= min_insn_size (insn
);
24150 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24151 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24152 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24153 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24155 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24156 if (!data
->ready_try_change
)
24158 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24159 data
->ready_try_change_size
= n_ready
;
24161 else if (data
->ready_try_change_size
< n_ready
)
24163 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24165 data
->ready_try_change_size
= n_ready
;
24167 sbitmap_zero (data
->ready_try_change
);
24169 /* Filter out insns from ready_try that the core will not be able to issue
24170 on current cycle due to decoder. */
24171 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24175 /* Revert the effect on ready_try. */
24177 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24179 int n_ready ATTRIBUTE_UNUSED
)
24181 const_ix86_first_cycle_multipass_data_t data
24182 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24183 unsigned int i
= 0;
24184 sbitmap_iterator sbi
;
24186 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24187 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
24193 /* Save the result of multipass lookahead scheduling for the next round. */
24195 core2i7_first_cycle_multipass_end (const void *_data
)
24197 const_ix86_first_cycle_multipass_data_t data
24198 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24199 ix86_first_cycle_multipass_data_t next_data
24200 = ix86_first_cycle_multipass_data
;
24204 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24205 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24209 /* Deallocate target data. */
24211 core2i7_first_cycle_multipass_fini (void *_data
)
24213 ix86_first_cycle_multipass_data_t data
24214 = (ix86_first_cycle_multipass_data_t
) _data
;
24216 if (data
->ready_try_change
)
24218 sbitmap_free (data
->ready_try_change
);
24219 data
->ready_try_change
= NULL
;
24220 data
->ready_try_change_size
= 0;
24224 /* Prepare for scheduling pass. */
24226 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24227 int verbose ATTRIBUTE_UNUSED
,
24228 int max_uid ATTRIBUTE_UNUSED
)
24230 /* Install scheduling hooks for current CPU. Some of these hooks are used
24231 in time-critical parts of the scheduler, so we only set them up when
24232 they are actually used. */
24235 case PROCESSOR_CORE2_32
:
24236 case PROCESSOR_CORE2_64
:
24237 case PROCESSOR_COREI7_32
:
24238 case PROCESSOR_COREI7_64
:
24239 targetm
.sched
.dfa_post_advance_cycle
24240 = core2i7_dfa_post_advance_cycle
;
24241 targetm
.sched
.first_cycle_multipass_init
24242 = core2i7_first_cycle_multipass_init
;
24243 targetm
.sched
.first_cycle_multipass_begin
24244 = core2i7_first_cycle_multipass_begin
;
24245 targetm
.sched
.first_cycle_multipass_issue
24246 = core2i7_first_cycle_multipass_issue
;
24247 targetm
.sched
.first_cycle_multipass_backtrack
24248 = core2i7_first_cycle_multipass_backtrack
;
24249 targetm
.sched
.first_cycle_multipass_end
24250 = core2i7_first_cycle_multipass_end
;
24251 targetm
.sched
.first_cycle_multipass_fini
24252 = core2i7_first_cycle_multipass_fini
;
24254 /* Set decoder parameters. */
24255 core2i7_secondary_decoder_max_insn_size
= 8;
24256 core2i7_ifetch_block_size
= 16;
24257 core2i7_ifetch_block_max_insns
= 6;
24261 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24262 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24263 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24264 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24265 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24266 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24267 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24273 /* Compute the alignment given to a constant that is being placed in memory.
24274 EXP is the constant and ALIGN is the alignment that the object would
24276 The value of this function is used instead of that alignment to align
24280 ix86_constant_alignment (tree exp
, int align
)
24282 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24283 || TREE_CODE (exp
) == INTEGER_CST
)
24285 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24287 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24290 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24291 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24292 return BITS_PER_WORD
;
24297 /* Compute the alignment for a static variable.
24298 TYPE is the data type, and ALIGN is the alignment that
24299 the object would ordinarily have. The value of this function is used
24300 instead of that alignment to align the object. */
24303 ix86_data_alignment (tree type
, int align
)
24305 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24307 if (AGGREGATE_TYPE_P (type
)
24308 && TYPE_SIZE (type
)
24309 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24310 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24311 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24312 && align
< max_align
)
24315 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24316 to 16byte boundary. */
24319 if (AGGREGATE_TYPE_P (type
)
24320 && TYPE_SIZE (type
)
24321 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24322 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24323 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24327 if (TREE_CODE (type
) == ARRAY_TYPE
)
24329 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24331 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24334 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24337 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24339 if ((TYPE_MODE (type
) == XCmode
24340 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24343 else if ((TREE_CODE (type
) == RECORD_TYPE
24344 || TREE_CODE (type
) == UNION_TYPE
24345 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24346 && TYPE_FIELDS (type
))
24348 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24350 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24353 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24354 || TREE_CODE (type
) == INTEGER_TYPE
)
24356 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24358 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24365 /* Compute the alignment for a local variable or a stack slot. EXP is
24366 the data type or decl itself, MODE is the widest mode available and
24367 ALIGN is the alignment that the object would ordinarily have. The
24368 value of this macro is used instead of that alignment to align the
24372 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24373 unsigned int align
)
24377 if (exp
&& DECL_P (exp
))
24379 type
= TREE_TYPE (exp
);
24388 /* Don't do dynamic stack realignment for long long objects with
24389 -mpreferred-stack-boundary=2. */
24392 && ix86_preferred_stack_boundary
< 64
24393 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24394 && (!type
|| !TYPE_USER_ALIGN (type
))
24395 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24398 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24399 register in MODE. We will return the largest alignment of XF
24403 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24404 align
= GET_MODE_ALIGNMENT (DFmode
);
24408 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24409 to 16byte boundary. Exact wording is:
24411 An array uses the same alignment as its elements, except that a local or
24412 global array variable of length at least 16 bytes or
24413 a C99 variable-length array variable always has alignment of at least 16 bytes.
24415 This was added to allow use of aligned SSE instructions at arrays. This
24416 rule is meant for static storage (where compiler can not do the analysis
24417 by itself). We follow it for automatic variables only when convenient.
24418 We fully control everything in the function compiled and functions from
24419 other unit can not rely on the alignment.
24421 Exclude va_list type. It is the common case of local array where
24422 we can not benefit from the alignment. */
24423 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24426 if (AGGREGATE_TYPE_P (type
)
24427 && (va_list_type_node
== NULL_TREE
24428 || (TYPE_MAIN_VARIANT (type
)
24429 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24430 && TYPE_SIZE (type
)
24431 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24432 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24433 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24436 if (TREE_CODE (type
) == ARRAY_TYPE
)
24438 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24440 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24443 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24445 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24447 if ((TYPE_MODE (type
) == XCmode
24448 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24451 else if ((TREE_CODE (type
) == RECORD_TYPE
24452 || TREE_CODE (type
) == UNION_TYPE
24453 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24454 && TYPE_FIELDS (type
))
24456 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24458 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24461 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24462 || TREE_CODE (type
) == INTEGER_TYPE
)
24465 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24467 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24473 /* Compute the minimum required alignment for dynamic stack realignment
24474 purposes for a local variable, parameter or a stack slot. EXP is
24475 the data type or decl itself, MODE is its mode and ALIGN is the
24476 alignment that the object would ordinarily have. */
24479 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24480 unsigned int align
)
24484 if (exp
&& DECL_P (exp
))
24486 type
= TREE_TYPE (exp
);
24495 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24498 /* Don't do dynamic stack realignment for long long objects with
24499 -mpreferred-stack-boundary=2. */
24500 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24501 && (!type
|| !TYPE_USER_ALIGN (type
))
24502 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24508 /* Find a location for the static chain incoming to a nested function.
24509 This is a register, unless all free registers are used by arguments. */
24512 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24516 if (!DECL_STATIC_CHAIN (fndecl
))
24521 /* We always use R10 in 64-bit mode. */
24529 /* By default in 32-bit mode we use ECX to pass the static chain. */
24532 fntype
= TREE_TYPE (fndecl
);
24533 ccvt
= ix86_get_callcvt (fntype
);
24534 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24536 /* Fastcall functions use ecx/edx for arguments, which leaves
24537 us with EAX for the static chain.
24538 Thiscall functions use ecx for arguments, which also
24539 leaves us with EAX for the static chain. */
24542 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24544 /* For regparm 3, we have no free call-clobbered registers in
24545 which to store the static chain. In order to implement this,
24546 we have the trampoline push the static chain to the stack.
24547 However, we can't push a value below the return address when
24548 we call the nested function directly, so we have to use an
24549 alternate entry point. For this we use ESI, and have the
24550 alternate entry point push ESI, so that things appear the
24551 same once we're executing the nested function. */
24554 if (fndecl
== current_function_decl
)
24555 ix86_static_chain_on_stack
= true;
24556 return gen_frame_mem (SImode
,
24557 plus_constant (Pmode
,
24558 arg_pointer_rtx
, -8));
24564 return gen_rtx_REG (Pmode
, regno
);
24567 /* Emit RTL insns to initialize the variable parts of a trampoline.
24568 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24569 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24570 to be passed to the target function. */
24573 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24579 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24585 /* Load the function address to r11. Try to load address using
24586 the shorter movl instead of movabs. We may want to support
24587 movq for kernel mode, but kernel does not use trampolines at
24588 the moment. FNADDR is a 32bit address and may not be in
24589 DImode when ptr_mode == SImode. Always use movl in this
24591 if (ptr_mode
== SImode
24592 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24594 fnaddr
= copy_addr_to_reg (fnaddr
);
24596 mem
= adjust_address (m_tramp
, HImode
, offset
);
24597 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24599 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24600 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24605 mem
= adjust_address (m_tramp
, HImode
, offset
);
24606 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24608 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24609 emit_move_insn (mem
, fnaddr
);
24613 /* Load static chain using movabs to r10. Use the shorter movl
24614 instead of movabs when ptr_mode == SImode. */
24615 if (ptr_mode
== SImode
)
24626 mem
= adjust_address (m_tramp
, HImode
, offset
);
24627 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24629 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24630 emit_move_insn (mem
, chain_value
);
24633 /* Jump to r11; the last (unused) byte is a nop, only there to
24634 pad the write out to a single 32-bit store. */
24635 mem
= adjust_address (m_tramp
, SImode
, offset
);
24636 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24643 /* Depending on the static chain location, either load a register
24644 with a constant, or push the constant to the stack. All of the
24645 instructions are the same size. */
24646 chain
= ix86_static_chain (fndecl
, true);
24649 switch (REGNO (chain
))
24652 opcode
= 0xb8; break;
24654 opcode
= 0xb9; break;
24656 gcc_unreachable ();
24662 mem
= adjust_address (m_tramp
, QImode
, offset
);
24663 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24665 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24666 emit_move_insn (mem
, chain_value
);
24669 mem
= adjust_address (m_tramp
, QImode
, offset
);
24670 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24672 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24674 /* Compute offset from the end of the jmp to the target function.
24675 In the case in which the trampoline stores the static chain on
24676 the stack, we need to skip the first insn which pushes the
24677 (call-saved) register static chain; this push is 1 byte. */
24679 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24680 plus_constant (Pmode
, XEXP (m_tramp
, 0),
24681 offset
- (MEM_P (chain
) ? 1 : 0)),
24682 NULL_RTX
, 1, OPTAB_DIRECT
);
24683 emit_move_insn (mem
, disp
);
24686 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24688 #ifdef HAVE_ENABLE_EXECUTE_STACK
24689 #ifdef CHECK_EXECUTE_STACK_ENABLED
24690 if (CHECK_EXECUTE_STACK_ENABLED
)
24692 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24693 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24697 /* The following file contains several enumerations and data structures
24698 built from the definitions in i386-builtin-types.def. */
24700 #include "i386-builtin-types.inc"
24702 /* Table for the ix86 builtin non-function types. */
24703 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24705 /* Retrieve an element from the above table, building some of
24706 the types lazily. */
24709 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24711 unsigned int index
;
24714 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24716 type
= ix86_builtin_type_tab
[(int) tcode
];
24720 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24721 if (tcode
<= IX86_BT_LAST_VECT
)
24723 enum machine_mode mode
;
24725 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24726 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24727 mode
= ix86_builtin_type_vect_mode
[index
];
24729 type
= build_vector_type_for_mode (itype
, mode
);
24735 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24736 if (tcode
<= IX86_BT_LAST_PTR
)
24737 quals
= TYPE_UNQUALIFIED
;
24739 quals
= TYPE_QUAL_CONST
;
24741 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24742 if (quals
!= TYPE_UNQUALIFIED
)
24743 itype
= build_qualified_type (itype
, quals
);
24745 type
= build_pointer_type (itype
);
24748 ix86_builtin_type_tab
[(int) tcode
] = type
;
24752 /* Table for the ix86 builtin function types. */
24753 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24755 /* Retrieve an element from the above table, building some of
24756 the types lazily. */
24759 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24763 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24765 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24769 if (tcode
<= IX86_BT_LAST_FUNC
)
24771 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24772 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24773 tree rtype
, atype
, args
= void_list_node
;
24776 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24777 for (i
= after
- 1; i
> start
; --i
)
24779 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24780 args
= tree_cons (NULL
, atype
, args
);
24783 type
= build_function_type (rtype
, args
);
24787 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24788 enum ix86_builtin_func_type icode
;
24790 icode
= ix86_builtin_func_alias_base
[index
];
24791 type
= ix86_get_builtin_func_type (icode
);
24794 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24799 /* Codes for all the SSE/MMX builtins. */
24802 IX86_BUILTIN_ADDPS
,
24803 IX86_BUILTIN_ADDSS
,
24804 IX86_BUILTIN_DIVPS
,
24805 IX86_BUILTIN_DIVSS
,
24806 IX86_BUILTIN_MULPS
,
24807 IX86_BUILTIN_MULSS
,
24808 IX86_BUILTIN_SUBPS
,
24809 IX86_BUILTIN_SUBSS
,
24811 IX86_BUILTIN_CMPEQPS
,
24812 IX86_BUILTIN_CMPLTPS
,
24813 IX86_BUILTIN_CMPLEPS
,
24814 IX86_BUILTIN_CMPGTPS
,
24815 IX86_BUILTIN_CMPGEPS
,
24816 IX86_BUILTIN_CMPNEQPS
,
24817 IX86_BUILTIN_CMPNLTPS
,
24818 IX86_BUILTIN_CMPNLEPS
,
24819 IX86_BUILTIN_CMPNGTPS
,
24820 IX86_BUILTIN_CMPNGEPS
,
24821 IX86_BUILTIN_CMPORDPS
,
24822 IX86_BUILTIN_CMPUNORDPS
,
24823 IX86_BUILTIN_CMPEQSS
,
24824 IX86_BUILTIN_CMPLTSS
,
24825 IX86_BUILTIN_CMPLESS
,
24826 IX86_BUILTIN_CMPNEQSS
,
24827 IX86_BUILTIN_CMPNLTSS
,
24828 IX86_BUILTIN_CMPNLESS
,
24829 IX86_BUILTIN_CMPNGTSS
,
24830 IX86_BUILTIN_CMPNGESS
,
24831 IX86_BUILTIN_CMPORDSS
,
24832 IX86_BUILTIN_CMPUNORDSS
,
24834 IX86_BUILTIN_COMIEQSS
,
24835 IX86_BUILTIN_COMILTSS
,
24836 IX86_BUILTIN_COMILESS
,
24837 IX86_BUILTIN_COMIGTSS
,
24838 IX86_BUILTIN_COMIGESS
,
24839 IX86_BUILTIN_COMINEQSS
,
24840 IX86_BUILTIN_UCOMIEQSS
,
24841 IX86_BUILTIN_UCOMILTSS
,
24842 IX86_BUILTIN_UCOMILESS
,
24843 IX86_BUILTIN_UCOMIGTSS
,
24844 IX86_BUILTIN_UCOMIGESS
,
24845 IX86_BUILTIN_UCOMINEQSS
,
24847 IX86_BUILTIN_CVTPI2PS
,
24848 IX86_BUILTIN_CVTPS2PI
,
24849 IX86_BUILTIN_CVTSI2SS
,
24850 IX86_BUILTIN_CVTSI642SS
,
24851 IX86_BUILTIN_CVTSS2SI
,
24852 IX86_BUILTIN_CVTSS2SI64
,
24853 IX86_BUILTIN_CVTTPS2PI
,
24854 IX86_BUILTIN_CVTTSS2SI
,
24855 IX86_BUILTIN_CVTTSS2SI64
,
24857 IX86_BUILTIN_MAXPS
,
24858 IX86_BUILTIN_MAXSS
,
24859 IX86_BUILTIN_MINPS
,
24860 IX86_BUILTIN_MINSS
,
24862 IX86_BUILTIN_LOADUPS
,
24863 IX86_BUILTIN_STOREUPS
,
24864 IX86_BUILTIN_MOVSS
,
24866 IX86_BUILTIN_MOVHLPS
,
24867 IX86_BUILTIN_MOVLHPS
,
24868 IX86_BUILTIN_LOADHPS
,
24869 IX86_BUILTIN_LOADLPS
,
24870 IX86_BUILTIN_STOREHPS
,
24871 IX86_BUILTIN_STORELPS
,
24873 IX86_BUILTIN_MASKMOVQ
,
24874 IX86_BUILTIN_MOVMSKPS
,
24875 IX86_BUILTIN_PMOVMSKB
,
24877 IX86_BUILTIN_MOVNTPS
,
24878 IX86_BUILTIN_MOVNTQ
,
24880 IX86_BUILTIN_LOADDQU
,
24881 IX86_BUILTIN_STOREDQU
,
24883 IX86_BUILTIN_PACKSSWB
,
24884 IX86_BUILTIN_PACKSSDW
,
24885 IX86_BUILTIN_PACKUSWB
,
24887 IX86_BUILTIN_PADDB
,
24888 IX86_BUILTIN_PADDW
,
24889 IX86_BUILTIN_PADDD
,
24890 IX86_BUILTIN_PADDQ
,
24891 IX86_BUILTIN_PADDSB
,
24892 IX86_BUILTIN_PADDSW
,
24893 IX86_BUILTIN_PADDUSB
,
24894 IX86_BUILTIN_PADDUSW
,
24895 IX86_BUILTIN_PSUBB
,
24896 IX86_BUILTIN_PSUBW
,
24897 IX86_BUILTIN_PSUBD
,
24898 IX86_BUILTIN_PSUBQ
,
24899 IX86_BUILTIN_PSUBSB
,
24900 IX86_BUILTIN_PSUBSW
,
24901 IX86_BUILTIN_PSUBUSB
,
24902 IX86_BUILTIN_PSUBUSW
,
24905 IX86_BUILTIN_PANDN
,
24909 IX86_BUILTIN_PAVGB
,
24910 IX86_BUILTIN_PAVGW
,
24912 IX86_BUILTIN_PCMPEQB
,
24913 IX86_BUILTIN_PCMPEQW
,
24914 IX86_BUILTIN_PCMPEQD
,
24915 IX86_BUILTIN_PCMPGTB
,
24916 IX86_BUILTIN_PCMPGTW
,
24917 IX86_BUILTIN_PCMPGTD
,
24919 IX86_BUILTIN_PMADDWD
,
24921 IX86_BUILTIN_PMAXSW
,
24922 IX86_BUILTIN_PMAXUB
,
24923 IX86_BUILTIN_PMINSW
,
24924 IX86_BUILTIN_PMINUB
,
24926 IX86_BUILTIN_PMULHUW
,
24927 IX86_BUILTIN_PMULHW
,
24928 IX86_BUILTIN_PMULLW
,
24930 IX86_BUILTIN_PSADBW
,
24931 IX86_BUILTIN_PSHUFW
,
24933 IX86_BUILTIN_PSLLW
,
24934 IX86_BUILTIN_PSLLD
,
24935 IX86_BUILTIN_PSLLQ
,
24936 IX86_BUILTIN_PSRAW
,
24937 IX86_BUILTIN_PSRAD
,
24938 IX86_BUILTIN_PSRLW
,
24939 IX86_BUILTIN_PSRLD
,
24940 IX86_BUILTIN_PSRLQ
,
24941 IX86_BUILTIN_PSLLWI
,
24942 IX86_BUILTIN_PSLLDI
,
24943 IX86_BUILTIN_PSLLQI
,
24944 IX86_BUILTIN_PSRAWI
,
24945 IX86_BUILTIN_PSRADI
,
24946 IX86_BUILTIN_PSRLWI
,
24947 IX86_BUILTIN_PSRLDI
,
24948 IX86_BUILTIN_PSRLQI
,
24950 IX86_BUILTIN_PUNPCKHBW
,
24951 IX86_BUILTIN_PUNPCKHWD
,
24952 IX86_BUILTIN_PUNPCKHDQ
,
24953 IX86_BUILTIN_PUNPCKLBW
,
24954 IX86_BUILTIN_PUNPCKLWD
,
24955 IX86_BUILTIN_PUNPCKLDQ
,
24957 IX86_BUILTIN_SHUFPS
,
24959 IX86_BUILTIN_RCPPS
,
24960 IX86_BUILTIN_RCPSS
,
24961 IX86_BUILTIN_RSQRTPS
,
24962 IX86_BUILTIN_RSQRTPS_NR
,
24963 IX86_BUILTIN_RSQRTSS
,
24964 IX86_BUILTIN_RSQRTF
,
24965 IX86_BUILTIN_SQRTPS
,
24966 IX86_BUILTIN_SQRTPS_NR
,
24967 IX86_BUILTIN_SQRTSS
,
24969 IX86_BUILTIN_UNPCKHPS
,
24970 IX86_BUILTIN_UNPCKLPS
,
24972 IX86_BUILTIN_ANDPS
,
24973 IX86_BUILTIN_ANDNPS
,
24975 IX86_BUILTIN_XORPS
,
24978 IX86_BUILTIN_LDMXCSR
,
24979 IX86_BUILTIN_STMXCSR
,
24980 IX86_BUILTIN_SFENCE
,
24982 /* 3DNow! Original */
24983 IX86_BUILTIN_FEMMS
,
24984 IX86_BUILTIN_PAVGUSB
,
24985 IX86_BUILTIN_PF2ID
,
24986 IX86_BUILTIN_PFACC
,
24987 IX86_BUILTIN_PFADD
,
24988 IX86_BUILTIN_PFCMPEQ
,
24989 IX86_BUILTIN_PFCMPGE
,
24990 IX86_BUILTIN_PFCMPGT
,
24991 IX86_BUILTIN_PFMAX
,
24992 IX86_BUILTIN_PFMIN
,
24993 IX86_BUILTIN_PFMUL
,
24994 IX86_BUILTIN_PFRCP
,
24995 IX86_BUILTIN_PFRCPIT1
,
24996 IX86_BUILTIN_PFRCPIT2
,
24997 IX86_BUILTIN_PFRSQIT1
,
24998 IX86_BUILTIN_PFRSQRT
,
24999 IX86_BUILTIN_PFSUB
,
25000 IX86_BUILTIN_PFSUBR
,
25001 IX86_BUILTIN_PI2FD
,
25002 IX86_BUILTIN_PMULHRW
,
25004 /* 3DNow! Athlon Extensions */
25005 IX86_BUILTIN_PF2IW
,
25006 IX86_BUILTIN_PFNACC
,
25007 IX86_BUILTIN_PFPNACC
,
25008 IX86_BUILTIN_PI2FW
,
25009 IX86_BUILTIN_PSWAPDSI
,
25010 IX86_BUILTIN_PSWAPDSF
,
25013 IX86_BUILTIN_ADDPD
,
25014 IX86_BUILTIN_ADDSD
,
25015 IX86_BUILTIN_DIVPD
,
25016 IX86_BUILTIN_DIVSD
,
25017 IX86_BUILTIN_MULPD
,
25018 IX86_BUILTIN_MULSD
,
25019 IX86_BUILTIN_SUBPD
,
25020 IX86_BUILTIN_SUBSD
,
25022 IX86_BUILTIN_CMPEQPD
,
25023 IX86_BUILTIN_CMPLTPD
,
25024 IX86_BUILTIN_CMPLEPD
,
25025 IX86_BUILTIN_CMPGTPD
,
25026 IX86_BUILTIN_CMPGEPD
,
25027 IX86_BUILTIN_CMPNEQPD
,
25028 IX86_BUILTIN_CMPNLTPD
,
25029 IX86_BUILTIN_CMPNLEPD
,
25030 IX86_BUILTIN_CMPNGTPD
,
25031 IX86_BUILTIN_CMPNGEPD
,
25032 IX86_BUILTIN_CMPORDPD
,
25033 IX86_BUILTIN_CMPUNORDPD
,
25034 IX86_BUILTIN_CMPEQSD
,
25035 IX86_BUILTIN_CMPLTSD
,
25036 IX86_BUILTIN_CMPLESD
,
25037 IX86_BUILTIN_CMPNEQSD
,
25038 IX86_BUILTIN_CMPNLTSD
,
25039 IX86_BUILTIN_CMPNLESD
,
25040 IX86_BUILTIN_CMPORDSD
,
25041 IX86_BUILTIN_CMPUNORDSD
,
25043 IX86_BUILTIN_COMIEQSD
,
25044 IX86_BUILTIN_COMILTSD
,
25045 IX86_BUILTIN_COMILESD
,
25046 IX86_BUILTIN_COMIGTSD
,
25047 IX86_BUILTIN_COMIGESD
,
25048 IX86_BUILTIN_COMINEQSD
,
25049 IX86_BUILTIN_UCOMIEQSD
,
25050 IX86_BUILTIN_UCOMILTSD
,
25051 IX86_BUILTIN_UCOMILESD
,
25052 IX86_BUILTIN_UCOMIGTSD
,
25053 IX86_BUILTIN_UCOMIGESD
,
25054 IX86_BUILTIN_UCOMINEQSD
,
25056 IX86_BUILTIN_MAXPD
,
25057 IX86_BUILTIN_MAXSD
,
25058 IX86_BUILTIN_MINPD
,
25059 IX86_BUILTIN_MINSD
,
25061 IX86_BUILTIN_ANDPD
,
25062 IX86_BUILTIN_ANDNPD
,
25064 IX86_BUILTIN_XORPD
,
25066 IX86_BUILTIN_SQRTPD
,
25067 IX86_BUILTIN_SQRTSD
,
25069 IX86_BUILTIN_UNPCKHPD
,
25070 IX86_BUILTIN_UNPCKLPD
,
25072 IX86_BUILTIN_SHUFPD
,
25074 IX86_BUILTIN_LOADUPD
,
25075 IX86_BUILTIN_STOREUPD
,
25076 IX86_BUILTIN_MOVSD
,
25078 IX86_BUILTIN_LOADHPD
,
25079 IX86_BUILTIN_LOADLPD
,
25081 IX86_BUILTIN_CVTDQ2PD
,
25082 IX86_BUILTIN_CVTDQ2PS
,
25084 IX86_BUILTIN_CVTPD2DQ
,
25085 IX86_BUILTIN_CVTPD2PI
,
25086 IX86_BUILTIN_CVTPD2PS
,
25087 IX86_BUILTIN_CVTTPD2DQ
,
25088 IX86_BUILTIN_CVTTPD2PI
,
25090 IX86_BUILTIN_CVTPI2PD
,
25091 IX86_BUILTIN_CVTSI2SD
,
25092 IX86_BUILTIN_CVTSI642SD
,
25094 IX86_BUILTIN_CVTSD2SI
,
25095 IX86_BUILTIN_CVTSD2SI64
,
25096 IX86_BUILTIN_CVTSD2SS
,
25097 IX86_BUILTIN_CVTSS2SD
,
25098 IX86_BUILTIN_CVTTSD2SI
,
25099 IX86_BUILTIN_CVTTSD2SI64
,
25101 IX86_BUILTIN_CVTPS2DQ
,
25102 IX86_BUILTIN_CVTPS2PD
,
25103 IX86_BUILTIN_CVTTPS2DQ
,
25105 IX86_BUILTIN_MOVNTI
,
25106 IX86_BUILTIN_MOVNTI64
,
25107 IX86_BUILTIN_MOVNTPD
,
25108 IX86_BUILTIN_MOVNTDQ
,
25110 IX86_BUILTIN_MOVQ128
,
25113 IX86_BUILTIN_MASKMOVDQU
,
25114 IX86_BUILTIN_MOVMSKPD
,
25115 IX86_BUILTIN_PMOVMSKB128
,
25117 IX86_BUILTIN_PACKSSWB128
,
25118 IX86_BUILTIN_PACKSSDW128
,
25119 IX86_BUILTIN_PACKUSWB128
,
25121 IX86_BUILTIN_PADDB128
,
25122 IX86_BUILTIN_PADDW128
,
25123 IX86_BUILTIN_PADDD128
,
25124 IX86_BUILTIN_PADDQ128
,
25125 IX86_BUILTIN_PADDSB128
,
25126 IX86_BUILTIN_PADDSW128
,
25127 IX86_BUILTIN_PADDUSB128
,
25128 IX86_BUILTIN_PADDUSW128
,
25129 IX86_BUILTIN_PSUBB128
,
25130 IX86_BUILTIN_PSUBW128
,
25131 IX86_BUILTIN_PSUBD128
,
25132 IX86_BUILTIN_PSUBQ128
,
25133 IX86_BUILTIN_PSUBSB128
,
25134 IX86_BUILTIN_PSUBSW128
,
25135 IX86_BUILTIN_PSUBUSB128
,
25136 IX86_BUILTIN_PSUBUSW128
,
25138 IX86_BUILTIN_PAND128
,
25139 IX86_BUILTIN_PANDN128
,
25140 IX86_BUILTIN_POR128
,
25141 IX86_BUILTIN_PXOR128
,
25143 IX86_BUILTIN_PAVGB128
,
25144 IX86_BUILTIN_PAVGW128
,
25146 IX86_BUILTIN_PCMPEQB128
,
25147 IX86_BUILTIN_PCMPEQW128
,
25148 IX86_BUILTIN_PCMPEQD128
,
25149 IX86_BUILTIN_PCMPGTB128
,
25150 IX86_BUILTIN_PCMPGTW128
,
25151 IX86_BUILTIN_PCMPGTD128
,
25153 IX86_BUILTIN_PMADDWD128
,
25155 IX86_BUILTIN_PMAXSW128
,
25156 IX86_BUILTIN_PMAXUB128
,
25157 IX86_BUILTIN_PMINSW128
,
25158 IX86_BUILTIN_PMINUB128
,
25160 IX86_BUILTIN_PMULUDQ
,
25161 IX86_BUILTIN_PMULUDQ128
,
25162 IX86_BUILTIN_PMULHUW128
,
25163 IX86_BUILTIN_PMULHW128
,
25164 IX86_BUILTIN_PMULLW128
,
25166 IX86_BUILTIN_PSADBW128
,
25167 IX86_BUILTIN_PSHUFHW
,
25168 IX86_BUILTIN_PSHUFLW
,
25169 IX86_BUILTIN_PSHUFD
,
25171 IX86_BUILTIN_PSLLDQI128
,
25172 IX86_BUILTIN_PSLLWI128
,
25173 IX86_BUILTIN_PSLLDI128
,
25174 IX86_BUILTIN_PSLLQI128
,
25175 IX86_BUILTIN_PSRAWI128
,
25176 IX86_BUILTIN_PSRADI128
,
25177 IX86_BUILTIN_PSRLDQI128
,
25178 IX86_BUILTIN_PSRLWI128
,
25179 IX86_BUILTIN_PSRLDI128
,
25180 IX86_BUILTIN_PSRLQI128
,
25182 IX86_BUILTIN_PSLLDQ128
,
25183 IX86_BUILTIN_PSLLW128
,
25184 IX86_BUILTIN_PSLLD128
,
25185 IX86_BUILTIN_PSLLQ128
,
25186 IX86_BUILTIN_PSRAW128
,
25187 IX86_BUILTIN_PSRAD128
,
25188 IX86_BUILTIN_PSRLW128
,
25189 IX86_BUILTIN_PSRLD128
,
25190 IX86_BUILTIN_PSRLQ128
,
25192 IX86_BUILTIN_PUNPCKHBW128
,
25193 IX86_BUILTIN_PUNPCKHWD128
,
25194 IX86_BUILTIN_PUNPCKHDQ128
,
25195 IX86_BUILTIN_PUNPCKHQDQ128
,
25196 IX86_BUILTIN_PUNPCKLBW128
,
25197 IX86_BUILTIN_PUNPCKLWD128
,
25198 IX86_BUILTIN_PUNPCKLDQ128
,
25199 IX86_BUILTIN_PUNPCKLQDQ128
,
25201 IX86_BUILTIN_CLFLUSH
,
25202 IX86_BUILTIN_MFENCE
,
25203 IX86_BUILTIN_LFENCE
,
25204 IX86_BUILTIN_PAUSE
,
25206 IX86_BUILTIN_BSRSI
,
25207 IX86_BUILTIN_BSRDI
,
25208 IX86_BUILTIN_RDPMC
,
25209 IX86_BUILTIN_RDTSC
,
25210 IX86_BUILTIN_RDTSCP
,
25211 IX86_BUILTIN_ROLQI
,
25212 IX86_BUILTIN_ROLHI
,
25213 IX86_BUILTIN_RORQI
,
25214 IX86_BUILTIN_RORHI
,
25217 IX86_BUILTIN_ADDSUBPS
,
25218 IX86_BUILTIN_HADDPS
,
25219 IX86_BUILTIN_HSUBPS
,
25220 IX86_BUILTIN_MOVSHDUP
,
25221 IX86_BUILTIN_MOVSLDUP
,
25222 IX86_BUILTIN_ADDSUBPD
,
25223 IX86_BUILTIN_HADDPD
,
25224 IX86_BUILTIN_HSUBPD
,
25225 IX86_BUILTIN_LDDQU
,
25227 IX86_BUILTIN_MONITOR
,
25228 IX86_BUILTIN_MWAIT
,
25231 IX86_BUILTIN_PHADDW
,
25232 IX86_BUILTIN_PHADDD
,
25233 IX86_BUILTIN_PHADDSW
,
25234 IX86_BUILTIN_PHSUBW
,
25235 IX86_BUILTIN_PHSUBD
,
25236 IX86_BUILTIN_PHSUBSW
,
25237 IX86_BUILTIN_PMADDUBSW
,
25238 IX86_BUILTIN_PMULHRSW
,
25239 IX86_BUILTIN_PSHUFB
,
25240 IX86_BUILTIN_PSIGNB
,
25241 IX86_BUILTIN_PSIGNW
,
25242 IX86_BUILTIN_PSIGND
,
25243 IX86_BUILTIN_PALIGNR
,
25244 IX86_BUILTIN_PABSB
,
25245 IX86_BUILTIN_PABSW
,
25246 IX86_BUILTIN_PABSD
,
25248 IX86_BUILTIN_PHADDW128
,
25249 IX86_BUILTIN_PHADDD128
,
25250 IX86_BUILTIN_PHADDSW128
,
25251 IX86_BUILTIN_PHSUBW128
,
25252 IX86_BUILTIN_PHSUBD128
,
25253 IX86_BUILTIN_PHSUBSW128
,
25254 IX86_BUILTIN_PMADDUBSW128
,
25255 IX86_BUILTIN_PMULHRSW128
,
25256 IX86_BUILTIN_PSHUFB128
,
25257 IX86_BUILTIN_PSIGNB128
,
25258 IX86_BUILTIN_PSIGNW128
,
25259 IX86_BUILTIN_PSIGND128
,
25260 IX86_BUILTIN_PALIGNR128
,
25261 IX86_BUILTIN_PABSB128
,
25262 IX86_BUILTIN_PABSW128
,
25263 IX86_BUILTIN_PABSD128
,
25265 /* AMDFAM10 - SSE4A New Instructions. */
25266 IX86_BUILTIN_MOVNTSD
,
25267 IX86_BUILTIN_MOVNTSS
,
25268 IX86_BUILTIN_EXTRQI
,
25269 IX86_BUILTIN_EXTRQ
,
25270 IX86_BUILTIN_INSERTQI
,
25271 IX86_BUILTIN_INSERTQ
,
25274 IX86_BUILTIN_BLENDPD
,
25275 IX86_BUILTIN_BLENDPS
,
25276 IX86_BUILTIN_BLENDVPD
,
25277 IX86_BUILTIN_BLENDVPS
,
25278 IX86_BUILTIN_PBLENDVB128
,
25279 IX86_BUILTIN_PBLENDW128
,
25284 IX86_BUILTIN_INSERTPS128
,
25286 IX86_BUILTIN_MOVNTDQA
,
25287 IX86_BUILTIN_MPSADBW128
,
25288 IX86_BUILTIN_PACKUSDW128
,
25289 IX86_BUILTIN_PCMPEQQ
,
25290 IX86_BUILTIN_PHMINPOSUW128
,
25292 IX86_BUILTIN_PMAXSB128
,
25293 IX86_BUILTIN_PMAXSD128
,
25294 IX86_BUILTIN_PMAXUD128
,
25295 IX86_BUILTIN_PMAXUW128
,
25297 IX86_BUILTIN_PMINSB128
,
25298 IX86_BUILTIN_PMINSD128
,
25299 IX86_BUILTIN_PMINUD128
,
25300 IX86_BUILTIN_PMINUW128
,
25302 IX86_BUILTIN_PMOVSXBW128
,
25303 IX86_BUILTIN_PMOVSXBD128
,
25304 IX86_BUILTIN_PMOVSXBQ128
,
25305 IX86_BUILTIN_PMOVSXWD128
,
25306 IX86_BUILTIN_PMOVSXWQ128
,
25307 IX86_BUILTIN_PMOVSXDQ128
,
25309 IX86_BUILTIN_PMOVZXBW128
,
25310 IX86_BUILTIN_PMOVZXBD128
,
25311 IX86_BUILTIN_PMOVZXBQ128
,
25312 IX86_BUILTIN_PMOVZXWD128
,
25313 IX86_BUILTIN_PMOVZXWQ128
,
25314 IX86_BUILTIN_PMOVZXDQ128
,
25316 IX86_BUILTIN_PMULDQ128
,
25317 IX86_BUILTIN_PMULLD128
,
25319 IX86_BUILTIN_ROUNDSD
,
25320 IX86_BUILTIN_ROUNDSS
,
25322 IX86_BUILTIN_ROUNDPD
,
25323 IX86_BUILTIN_ROUNDPS
,
25325 IX86_BUILTIN_FLOORPD
,
25326 IX86_BUILTIN_CEILPD
,
25327 IX86_BUILTIN_TRUNCPD
,
25328 IX86_BUILTIN_RINTPD
,
25329 IX86_BUILTIN_ROUNDPD_AZ
,
25331 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25332 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25333 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25335 IX86_BUILTIN_FLOORPS
,
25336 IX86_BUILTIN_CEILPS
,
25337 IX86_BUILTIN_TRUNCPS
,
25338 IX86_BUILTIN_RINTPS
,
25339 IX86_BUILTIN_ROUNDPS_AZ
,
25341 IX86_BUILTIN_FLOORPS_SFIX
,
25342 IX86_BUILTIN_CEILPS_SFIX
,
25343 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25345 IX86_BUILTIN_PTESTZ
,
25346 IX86_BUILTIN_PTESTC
,
25347 IX86_BUILTIN_PTESTNZC
,
25349 IX86_BUILTIN_VEC_INIT_V2SI
,
25350 IX86_BUILTIN_VEC_INIT_V4HI
,
25351 IX86_BUILTIN_VEC_INIT_V8QI
,
25352 IX86_BUILTIN_VEC_EXT_V2DF
,
25353 IX86_BUILTIN_VEC_EXT_V2DI
,
25354 IX86_BUILTIN_VEC_EXT_V4SF
,
25355 IX86_BUILTIN_VEC_EXT_V4SI
,
25356 IX86_BUILTIN_VEC_EXT_V8HI
,
25357 IX86_BUILTIN_VEC_EXT_V2SI
,
25358 IX86_BUILTIN_VEC_EXT_V4HI
,
25359 IX86_BUILTIN_VEC_EXT_V16QI
,
25360 IX86_BUILTIN_VEC_SET_V2DI
,
25361 IX86_BUILTIN_VEC_SET_V4SF
,
25362 IX86_BUILTIN_VEC_SET_V4SI
,
25363 IX86_BUILTIN_VEC_SET_V8HI
,
25364 IX86_BUILTIN_VEC_SET_V4HI
,
25365 IX86_BUILTIN_VEC_SET_V16QI
,
25367 IX86_BUILTIN_VEC_PACK_SFIX
,
25368 IX86_BUILTIN_VEC_PACK_SFIX256
,
25371 IX86_BUILTIN_CRC32QI
,
25372 IX86_BUILTIN_CRC32HI
,
25373 IX86_BUILTIN_CRC32SI
,
25374 IX86_BUILTIN_CRC32DI
,
25376 IX86_BUILTIN_PCMPESTRI128
,
25377 IX86_BUILTIN_PCMPESTRM128
,
25378 IX86_BUILTIN_PCMPESTRA128
,
25379 IX86_BUILTIN_PCMPESTRC128
,
25380 IX86_BUILTIN_PCMPESTRO128
,
25381 IX86_BUILTIN_PCMPESTRS128
,
25382 IX86_BUILTIN_PCMPESTRZ128
,
25383 IX86_BUILTIN_PCMPISTRI128
,
25384 IX86_BUILTIN_PCMPISTRM128
,
25385 IX86_BUILTIN_PCMPISTRA128
,
25386 IX86_BUILTIN_PCMPISTRC128
,
25387 IX86_BUILTIN_PCMPISTRO128
,
25388 IX86_BUILTIN_PCMPISTRS128
,
25389 IX86_BUILTIN_PCMPISTRZ128
,
25391 IX86_BUILTIN_PCMPGTQ
,
25393 /* AES instructions */
25394 IX86_BUILTIN_AESENC128
,
25395 IX86_BUILTIN_AESENCLAST128
,
25396 IX86_BUILTIN_AESDEC128
,
25397 IX86_BUILTIN_AESDECLAST128
,
25398 IX86_BUILTIN_AESIMC128
,
25399 IX86_BUILTIN_AESKEYGENASSIST128
,
25401 /* PCLMUL instruction */
25402 IX86_BUILTIN_PCLMULQDQ128
,
25405 IX86_BUILTIN_ADDPD256
,
25406 IX86_BUILTIN_ADDPS256
,
25407 IX86_BUILTIN_ADDSUBPD256
,
25408 IX86_BUILTIN_ADDSUBPS256
,
25409 IX86_BUILTIN_ANDPD256
,
25410 IX86_BUILTIN_ANDPS256
,
25411 IX86_BUILTIN_ANDNPD256
,
25412 IX86_BUILTIN_ANDNPS256
,
25413 IX86_BUILTIN_BLENDPD256
,
25414 IX86_BUILTIN_BLENDPS256
,
25415 IX86_BUILTIN_BLENDVPD256
,
25416 IX86_BUILTIN_BLENDVPS256
,
25417 IX86_BUILTIN_DIVPD256
,
25418 IX86_BUILTIN_DIVPS256
,
25419 IX86_BUILTIN_DPPS256
,
25420 IX86_BUILTIN_HADDPD256
,
25421 IX86_BUILTIN_HADDPS256
,
25422 IX86_BUILTIN_HSUBPD256
,
25423 IX86_BUILTIN_HSUBPS256
,
25424 IX86_BUILTIN_MAXPD256
,
25425 IX86_BUILTIN_MAXPS256
,
25426 IX86_BUILTIN_MINPD256
,
25427 IX86_BUILTIN_MINPS256
,
25428 IX86_BUILTIN_MULPD256
,
25429 IX86_BUILTIN_MULPS256
,
25430 IX86_BUILTIN_ORPD256
,
25431 IX86_BUILTIN_ORPS256
,
25432 IX86_BUILTIN_SHUFPD256
,
25433 IX86_BUILTIN_SHUFPS256
,
25434 IX86_BUILTIN_SUBPD256
,
25435 IX86_BUILTIN_SUBPS256
,
25436 IX86_BUILTIN_XORPD256
,
25437 IX86_BUILTIN_XORPS256
,
25438 IX86_BUILTIN_CMPSD
,
25439 IX86_BUILTIN_CMPSS
,
25440 IX86_BUILTIN_CMPPD
,
25441 IX86_BUILTIN_CMPPS
,
25442 IX86_BUILTIN_CMPPD256
,
25443 IX86_BUILTIN_CMPPS256
,
25444 IX86_BUILTIN_CVTDQ2PD256
,
25445 IX86_BUILTIN_CVTDQ2PS256
,
25446 IX86_BUILTIN_CVTPD2PS256
,
25447 IX86_BUILTIN_CVTPS2DQ256
,
25448 IX86_BUILTIN_CVTPS2PD256
,
25449 IX86_BUILTIN_CVTTPD2DQ256
,
25450 IX86_BUILTIN_CVTPD2DQ256
,
25451 IX86_BUILTIN_CVTTPS2DQ256
,
25452 IX86_BUILTIN_EXTRACTF128PD256
,
25453 IX86_BUILTIN_EXTRACTF128PS256
,
25454 IX86_BUILTIN_EXTRACTF128SI256
,
25455 IX86_BUILTIN_VZEROALL
,
25456 IX86_BUILTIN_VZEROUPPER
,
25457 IX86_BUILTIN_VPERMILVARPD
,
25458 IX86_BUILTIN_VPERMILVARPS
,
25459 IX86_BUILTIN_VPERMILVARPD256
,
25460 IX86_BUILTIN_VPERMILVARPS256
,
25461 IX86_BUILTIN_VPERMILPD
,
25462 IX86_BUILTIN_VPERMILPS
,
25463 IX86_BUILTIN_VPERMILPD256
,
25464 IX86_BUILTIN_VPERMILPS256
,
25465 IX86_BUILTIN_VPERMIL2PD
,
25466 IX86_BUILTIN_VPERMIL2PS
,
25467 IX86_BUILTIN_VPERMIL2PD256
,
25468 IX86_BUILTIN_VPERMIL2PS256
,
25469 IX86_BUILTIN_VPERM2F128PD256
,
25470 IX86_BUILTIN_VPERM2F128PS256
,
25471 IX86_BUILTIN_VPERM2F128SI256
,
25472 IX86_BUILTIN_VBROADCASTSS
,
25473 IX86_BUILTIN_VBROADCASTSD256
,
25474 IX86_BUILTIN_VBROADCASTSS256
,
25475 IX86_BUILTIN_VBROADCASTPD256
,
25476 IX86_BUILTIN_VBROADCASTPS256
,
25477 IX86_BUILTIN_VINSERTF128PD256
,
25478 IX86_BUILTIN_VINSERTF128PS256
,
25479 IX86_BUILTIN_VINSERTF128SI256
,
25480 IX86_BUILTIN_LOADUPD256
,
25481 IX86_BUILTIN_LOADUPS256
,
25482 IX86_BUILTIN_STOREUPD256
,
25483 IX86_BUILTIN_STOREUPS256
,
25484 IX86_BUILTIN_LDDQU256
,
25485 IX86_BUILTIN_MOVNTDQ256
,
25486 IX86_BUILTIN_MOVNTPD256
,
25487 IX86_BUILTIN_MOVNTPS256
,
25488 IX86_BUILTIN_LOADDQU256
,
25489 IX86_BUILTIN_STOREDQU256
,
25490 IX86_BUILTIN_MASKLOADPD
,
25491 IX86_BUILTIN_MASKLOADPS
,
25492 IX86_BUILTIN_MASKSTOREPD
,
25493 IX86_BUILTIN_MASKSTOREPS
,
25494 IX86_BUILTIN_MASKLOADPD256
,
25495 IX86_BUILTIN_MASKLOADPS256
,
25496 IX86_BUILTIN_MASKSTOREPD256
,
25497 IX86_BUILTIN_MASKSTOREPS256
,
25498 IX86_BUILTIN_MOVSHDUP256
,
25499 IX86_BUILTIN_MOVSLDUP256
,
25500 IX86_BUILTIN_MOVDDUP256
,
25502 IX86_BUILTIN_SQRTPD256
,
25503 IX86_BUILTIN_SQRTPS256
,
25504 IX86_BUILTIN_SQRTPS_NR256
,
25505 IX86_BUILTIN_RSQRTPS256
,
25506 IX86_BUILTIN_RSQRTPS_NR256
,
25508 IX86_BUILTIN_RCPPS256
,
25510 IX86_BUILTIN_ROUNDPD256
,
25511 IX86_BUILTIN_ROUNDPS256
,
25513 IX86_BUILTIN_FLOORPD256
,
25514 IX86_BUILTIN_CEILPD256
,
25515 IX86_BUILTIN_TRUNCPD256
,
25516 IX86_BUILTIN_RINTPD256
,
25517 IX86_BUILTIN_ROUNDPD_AZ256
,
25519 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25520 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25521 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25523 IX86_BUILTIN_FLOORPS256
,
25524 IX86_BUILTIN_CEILPS256
,
25525 IX86_BUILTIN_TRUNCPS256
,
25526 IX86_BUILTIN_RINTPS256
,
25527 IX86_BUILTIN_ROUNDPS_AZ256
,
25529 IX86_BUILTIN_FLOORPS_SFIX256
,
25530 IX86_BUILTIN_CEILPS_SFIX256
,
25531 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25533 IX86_BUILTIN_UNPCKHPD256
,
25534 IX86_BUILTIN_UNPCKLPD256
,
25535 IX86_BUILTIN_UNPCKHPS256
,
25536 IX86_BUILTIN_UNPCKLPS256
,
25538 IX86_BUILTIN_SI256_SI
,
25539 IX86_BUILTIN_PS256_PS
,
25540 IX86_BUILTIN_PD256_PD
,
25541 IX86_BUILTIN_SI_SI256
,
25542 IX86_BUILTIN_PS_PS256
,
25543 IX86_BUILTIN_PD_PD256
,
25545 IX86_BUILTIN_VTESTZPD
,
25546 IX86_BUILTIN_VTESTCPD
,
25547 IX86_BUILTIN_VTESTNZCPD
,
25548 IX86_BUILTIN_VTESTZPS
,
25549 IX86_BUILTIN_VTESTCPS
,
25550 IX86_BUILTIN_VTESTNZCPS
,
25551 IX86_BUILTIN_VTESTZPD256
,
25552 IX86_BUILTIN_VTESTCPD256
,
25553 IX86_BUILTIN_VTESTNZCPD256
,
25554 IX86_BUILTIN_VTESTZPS256
,
25555 IX86_BUILTIN_VTESTCPS256
,
25556 IX86_BUILTIN_VTESTNZCPS256
,
25557 IX86_BUILTIN_PTESTZ256
,
25558 IX86_BUILTIN_PTESTC256
,
25559 IX86_BUILTIN_PTESTNZC256
,
25561 IX86_BUILTIN_MOVMSKPD256
,
25562 IX86_BUILTIN_MOVMSKPS256
,
25565 IX86_BUILTIN_MPSADBW256
,
25566 IX86_BUILTIN_PABSB256
,
25567 IX86_BUILTIN_PABSW256
,
25568 IX86_BUILTIN_PABSD256
,
25569 IX86_BUILTIN_PACKSSDW256
,
25570 IX86_BUILTIN_PACKSSWB256
,
25571 IX86_BUILTIN_PACKUSDW256
,
25572 IX86_BUILTIN_PACKUSWB256
,
25573 IX86_BUILTIN_PADDB256
,
25574 IX86_BUILTIN_PADDW256
,
25575 IX86_BUILTIN_PADDD256
,
25576 IX86_BUILTIN_PADDQ256
,
25577 IX86_BUILTIN_PADDSB256
,
25578 IX86_BUILTIN_PADDSW256
,
25579 IX86_BUILTIN_PADDUSB256
,
25580 IX86_BUILTIN_PADDUSW256
,
25581 IX86_BUILTIN_PALIGNR256
,
25582 IX86_BUILTIN_AND256I
,
25583 IX86_BUILTIN_ANDNOT256I
,
25584 IX86_BUILTIN_PAVGB256
,
25585 IX86_BUILTIN_PAVGW256
,
25586 IX86_BUILTIN_PBLENDVB256
,
25587 IX86_BUILTIN_PBLENDVW256
,
25588 IX86_BUILTIN_PCMPEQB256
,
25589 IX86_BUILTIN_PCMPEQW256
,
25590 IX86_BUILTIN_PCMPEQD256
,
25591 IX86_BUILTIN_PCMPEQQ256
,
25592 IX86_BUILTIN_PCMPGTB256
,
25593 IX86_BUILTIN_PCMPGTW256
,
25594 IX86_BUILTIN_PCMPGTD256
,
25595 IX86_BUILTIN_PCMPGTQ256
,
25596 IX86_BUILTIN_PHADDW256
,
25597 IX86_BUILTIN_PHADDD256
,
25598 IX86_BUILTIN_PHADDSW256
,
25599 IX86_BUILTIN_PHSUBW256
,
25600 IX86_BUILTIN_PHSUBD256
,
25601 IX86_BUILTIN_PHSUBSW256
,
25602 IX86_BUILTIN_PMADDUBSW256
,
25603 IX86_BUILTIN_PMADDWD256
,
25604 IX86_BUILTIN_PMAXSB256
,
25605 IX86_BUILTIN_PMAXSW256
,
25606 IX86_BUILTIN_PMAXSD256
,
25607 IX86_BUILTIN_PMAXUB256
,
25608 IX86_BUILTIN_PMAXUW256
,
25609 IX86_BUILTIN_PMAXUD256
,
25610 IX86_BUILTIN_PMINSB256
,
25611 IX86_BUILTIN_PMINSW256
,
25612 IX86_BUILTIN_PMINSD256
,
25613 IX86_BUILTIN_PMINUB256
,
25614 IX86_BUILTIN_PMINUW256
,
25615 IX86_BUILTIN_PMINUD256
,
25616 IX86_BUILTIN_PMOVMSKB256
,
25617 IX86_BUILTIN_PMOVSXBW256
,
25618 IX86_BUILTIN_PMOVSXBD256
,
25619 IX86_BUILTIN_PMOVSXBQ256
,
25620 IX86_BUILTIN_PMOVSXWD256
,
25621 IX86_BUILTIN_PMOVSXWQ256
,
25622 IX86_BUILTIN_PMOVSXDQ256
,
25623 IX86_BUILTIN_PMOVZXBW256
,
25624 IX86_BUILTIN_PMOVZXBD256
,
25625 IX86_BUILTIN_PMOVZXBQ256
,
25626 IX86_BUILTIN_PMOVZXWD256
,
25627 IX86_BUILTIN_PMOVZXWQ256
,
25628 IX86_BUILTIN_PMOVZXDQ256
,
25629 IX86_BUILTIN_PMULDQ256
,
25630 IX86_BUILTIN_PMULHRSW256
,
25631 IX86_BUILTIN_PMULHUW256
,
25632 IX86_BUILTIN_PMULHW256
,
25633 IX86_BUILTIN_PMULLW256
,
25634 IX86_BUILTIN_PMULLD256
,
25635 IX86_BUILTIN_PMULUDQ256
,
25636 IX86_BUILTIN_POR256
,
25637 IX86_BUILTIN_PSADBW256
,
25638 IX86_BUILTIN_PSHUFB256
,
25639 IX86_BUILTIN_PSHUFD256
,
25640 IX86_BUILTIN_PSHUFHW256
,
25641 IX86_BUILTIN_PSHUFLW256
,
25642 IX86_BUILTIN_PSIGNB256
,
25643 IX86_BUILTIN_PSIGNW256
,
25644 IX86_BUILTIN_PSIGND256
,
25645 IX86_BUILTIN_PSLLDQI256
,
25646 IX86_BUILTIN_PSLLWI256
,
25647 IX86_BUILTIN_PSLLW256
,
25648 IX86_BUILTIN_PSLLDI256
,
25649 IX86_BUILTIN_PSLLD256
,
25650 IX86_BUILTIN_PSLLQI256
,
25651 IX86_BUILTIN_PSLLQ256
,
25652 IX86_BUILTIN_PSRAWI256
,
25653 IX86_BUILTIN_PSRAW256
,
25654 IX86_BUILTIN_PSRADI256
,
25655 IX86_BUILTIN_PSRAD256
,
25656 IX86_BUILTIN_PSRLDQI256
,
25657 IX86_BUILTIN_PSRLWI256
,
25658 IX86_BUILTIN_PSRLW256
,
25659 IX86_BUILTIN_PSRLDI256
,
25660 IX86_BUILTIN_PSRLD256
,
25661 IX86_BUILTIN_PSRLQI256
,
25662 IX86_BUILTIN_PSRLQ256
,
25663 IX86_BUILTIN_PSUBB256
,
25664 IX86_BUILTIN_PSUBW256
,
25665 IX86_BUILTIN_PSUBD256
,
25666 IX86_BUILTIN_PSUBQ256
,
25667 IX86_BUILTIN_PSUBSB256
,
25668 IX86_BUILTIN_PSUBSW256
,
25669 IX86_BUILTIN_PSUBUSB256
,
25670 IX86_BUILTIN_PSUBUSW256
,
25671 IX86_BUILTIN_PUNPCKHBW256
,
25672 IX86_BUILTIN_PUNPCKHWD256
,
25673 IX86_BUILTIN_PUNPCKHDQ256
,
25674 IX86_BUILTIN_PUNPCKHQDQ256
,
25675 IX86_BUILTIN_PUNPCKLBW256
,
25676 IX86_BUILTIN_PUNPCKLWD256
,
25677 IX86_BUILTIN_PUNPCKLDQ256
,
25678 IX86_BUILTIN_PUNPCKLQDQ256
,
25679 IX86_BUILTIN_PXOR256
,
25680 IX86_BUILTIN_MOVNTDQA256
,
25681 IX86_BUILTIN_VBROADCASTSS_PS
,
25682 IX86_BUILTIN_VBROADCASTSS_PS256
,
25683 IX86_BUILTIN_VBROADCASTSD_PD256
,
25684 IX86_BUILTIN_VBROADCASTSI256
,
25685 IX86_BUILTIN_PBLENDD256
,
25686 IX86_BUILTIN_PBLENDD128
,
25687 IX86_BUILTIN_PBROADCASTB256
,
25688 IX86_BUILTIN_PBROADCASTW256
,
25689 IX86_BUILTIN_PBROADCASTD256
,
25690 IX86_BUILTIN_PBROADCASTQ256
,
25691 IX86_BUILTIN_PBROADCASTB128
,
25692 IX86_BUILTIN_PBROADCASTW128
,
25693 IX86_BUILTIN_PBROADCASTD128
,
25694 IX86_BUILTIN_PBROADCASTQ128
,
25695 IX86_BUILTIN_VPERMVARSI256
,
25696 IX86_BUILTIN_VPERMDF256
,
25697 IX86_BUILTIN_VPERMVARSF256
,
25698 IX86_BUILTIN_VPERMDI256
,
25699 IX86_BUILTIN_VPERMTI256
,
25700 IX86_BUILTIN_VEXTRACT128I256
,
25701 IX86_BUILTIN_VINSERT128I256
,
25702 IX86_BUILTIN_MASKLOADD
,
25703 IX86_BUILTIN_MASKLOADQ
,
25704 IX86_BUILTIN_MASKLOADD256
,
25705 IX86_BUILTIN_MASKLOADQ256
,
25706 IX86_BUILTIN_MASKSTORED
,
25707 IX86_BUILTIN_MASKSTOREQ
,
25708 IX86_BUILTIN_MASKSTORED256
,
25709 IX86_BUILTIN_MASKSTOREQ256
,
25710 IX86_BUILTIN_PSLLVV4DI
,
25711 IX86_BUILTIN_PSLLVV2DI
,
25712 IX86_BUILTIN_PSLLVV8SI
,
25713 IX86_BUILTIN_PSLLVV4SI
,
25714 IX86_BUILTIN_PSRAVV8SI
,
25715 IX86_BUILTIN_PSRAVV4SI
,
25716 IX86_BUILTIN_PSRLVV4DI
,
25717 IX86_BUILTIN_PSRLVV2DI
,
25718 IX86_BUILTIN_PSRLVV8SI
,
25719 IX86_BUILTIN_PSRLVV4SI
,
25721 IX86_BUILTIN_GATHERSIV2DF
,
25722 IX86_BUILTIN_GATHERSIV4DF
,
25723 IX86_BUILTIN_GATHERDIV2DF
,
25724 IX86_BUILTIN_GATHERDIV4DF
,
25725 IX86_BUILTIN_GATHERSIV4SF
,
25726 IX86_BUILTIN_GATHERSIV8SF
,
25727 IX86_BUILTIN_GATHERDIV4SF
,
25728 IX86_BUILTIN_GATHERDIV8SF
,
25729 IX86_BUILTIN_GATHERSIV2DI
,
25730 IX86_BUILTIN_GATHERSIV4DI
,
25731 IX86_BUILTIN_GATHERDIV2DI
,
25732 IX86_BUILTIN_GATHERDIV4DI
,
25733 IX86_BUILTIN_GATHERSIV4SI
,
25734 IX86_BUILTIN_GATHERSIV8SI
,
25735 IX86_BUILTIN_GATHERDIV4SI
,
25736 IX86_BUILTIN_GATHERDIV8SI
,
25738 /* Alternate 4 element gather for the vectorizer where
25739 all operands are 32-byte wide. */
25740 IX86_BUILTIN_GATHERALTSIV4DF
,
25741 IX86_BUILTIN_GATHERALTDIV8SF
,
25742 IX86_BUILTIN_GATHERALTSIV4DI
,
25743 IX86_BUILTIN_GATHERALTDIV8SI
,
25745 /* TFmode support builtins. */
25747 IX86_BUILTIN_HUGE_VALQ
,
25748 IX86_BUILTIN_FABSQ
,
25749 IX86_BUILTIN_COPYSIGNQ
,
25751 /* Vectorizer support builtins. */
25752 IX86_BUILTIN_CPYSGNPS
,
25753 IX86_BUILTIN_CPYSGNPD
,
25754 IX86_BUILTIN_CPYSGNPS256
,
25755 IX86_BUILTIN_CPYSGNPD256
,
25757 /* FMA4 instructions. */
25758 IX86_BUILTIN_VFMADDSS
,
25759 IX86_BUILTIN_VFMADDSD
,
25760 IX86_BUILTIN_VFMADDPS
,
25761 IX86_BUILTIN_VFMADDPD
,
25762 IX86_BUILTIN_VFMADDPS256
,
25763 IX86_BUILTIN_VFMADDPD256
,
25764 IX86_BUILTIN_VFMADDSUBPS
,
25765 IX86_BUILTIN_VFMADDSUBPD
,
25766 IX86_BUILTIN_VFMADDSUBPS256
,
25767 IX86_BUILTIN_VFMADDSUBPD256
,
25769 /* FMA3 instructions. */
25770 IX86_BUILTIN_VFMADDSS3
,
25771 IX86_BUILTIN_VFMADDSD3
,
25773 /* XOP instructions. */
25774 IX86_BUILTIN_VPCMOV
,
25775 IX86_BUILTIN_VPCMOV_V2DI
,
25776 IX86_BUILTIN_VPCMOV_V4SI
,
25777 IX86_BUILTIN_VPCMOV_V8HI
,
25778 IX86_BUILTIN_VPCMOV_V16QI
,
25779 IX86_BUILTIN_VPCMOV_V4SF
,
25780 IX86_BUILTIN_VPCMOV_V2DF
,
25781 IX86_BUILTIN_VPCMOV256
,
25782 IX86_BUILTIN_VPCMOV_V4DI256
,
25783 IX86_BUILTIN_VPCMOV_V8SI256
,
25784 IX86_BUILTIN_VPCMOV_V16HI256
,
25785 IX86_BUILTIN_VPCMOV_V32QI256
,
25786 IX86_BUILTIN_VPCMOV_V8SF256
,
25787 IX86_BUILTIN_VPCMOV_V4DF256
,
25789 IX86_BUILTIN_VPPERM
,
25791 IX86_BUILTIN_VPMACSSWW
,
25792 IX86_BUILTIN_VPMACSWW
,
25793 IX86_BUILTIN_VPMACSSWD
,
25794 IX86_BUILTIN_VPMACSWD
,
25795 IX86_BUILTIN_VPMACSSDD
,
25796 IX86_BUILTIN_VPMACSDD
,
25797 IX86_BUILTIN_VPMACSSDQL
,
25798 IX86_BUILTIN_VPMACSSDQH
,
25799 IX86_BUILTIN_VPMACSDQL
,
25800 IX86_BUILTIN_VPMACSDQH
,
25801 IX86_BUILTIN_VPMADCSSWD
,
25802 IX86_BUILTIN_VPMADCSWD
,
25804 IX86_BUILTIN_VPHADDBW
,
25805 IX86_BUILTIN_VPHADDBD
,
25806 IX86_BUILTIN_VPHADDBQ
,
25807 IX86_BUILTIN_VPHADDWD
,
25808 IX86_BUILTIN_VPHADDWQ
,
25809 IX86_BUILTIN_VPHADDDQ
,
25810 IX86_BUILTIN_VPHADDUBW
,
25811 IX86_BUILTIN_VPHADDUBD
,
25812 IX86_BUILTIN_VPHADDUBQ
,
25813 IX86_BUILTIN_VPHADDUWD
,
25814 IX86_BUILTIN_VPHADDUWQ
,
25815 IX86_BUILTIN_VPHADDUDQ
,
25816 IX86_BUILTIN_VPHSUBBW
,
25817 IX86_BUILTIN_VPHSUBWD
,
25818 IX86_BUILTIN_VPHSUBDQ
,
25820 IX86_BUILTIN_VPROTB
,
25821 IX86_BUILTIN_VPROTW
,
25822 IX86_BUILTIN_VPROTD
,
25823 IX86_BUILTIN_VPROTQ
,
25824 IX86_BUILTIN_VPROTB_IMM
,
25825 IX86_BUILTIN_VPROTW_IMM
,
25826 IX86_BUILTIN_VPROTD_IMM
,
25827 IX86_BUILTIN_VPROTQ_IMM
,
25829 IX86_BUILTIN_VPSHLB
,
25830 IX86_BUILTIN_VPSHLW
,
25831 IX86_BUILTIN_VPSHLD
,
25832 IX86_BUILTIN_VPSHLQ
,
25833 IX86_BUILTIN_VPSHAB
,
25834 IX86_BUILTIN_VPSHAW
,
25835 IX86_BUILTIN_VPSHAD
,
25836 IX86_BUILTIN_VPSHAQ
,
25838 IX86_BUILTIN_VFRCZSS
,
25839 IX86_BUILTIN_VFRCZSD
,
25840 IX86_BUILTIN_VFRCZPS
,
25841 IX86_BUILTIN_VFRCZPD
,
25842 IX86_BUILTIN_VFRCZPS256
,
25843 IX86_BUILTIN_VFRCZPD256
,
25845 IX86_BUILTIN_VPCOMEQUB
,
25846 IX86_BUILTIN_VPCOMNEUB
,
25847 IX86_BUILTIN_VPCOMLTUB
,
25848 IX86_BUILTIN_VPCOMLEUB
,
25849 IX86_BUILTIN_VPCOMGTUB
,
25850 IX86_BUILTIN_VPCOMGEUB
,
25851 IX86_BUILTIN_VPCOMFALSEUB
,
25852 IX86_BUILTIN_VPCOMTRUEUB
,
25854 IX86_BUILTIN_VPCOMEQUW
,
25855 IX86_BUILTIN_VPCOMNEUW
,
25856 IX86_BUILTIN_VPCOMLTUW
,
25857 IX86_BUILTIN_VPCOMLEUW
,
25858 IX86_BUILTIN_VPCOMGTUW
,
25859 IX86_BUILTIN_VPCOMGEUW
,
25860 IX86_BUILTIN_VPCOMFALSEUW
,
25861 IX86_BUILTIN_VPCOMTRUEUW
,
25863 IX86_BUILTIN_VPCOMEQUD
,
25864 IX86_BUILTIN_VPCOMNEUD
,
25865 IX86_BUILTIN_VPCOMLTUD
,
25866 IX86_BUILTIN_VPCOMLEUD
,
25867 IX86_BUILTIN_VPCOMGTUD
,
25868 IX86_BUILTIN_VPCOMGEUD
,
25869 IX86_BUILTIN_VPCOMFALSEUD
,
25870 IX86_BUILTIN_VPCOMTRUEUD
,
25872 IX86_BUILTIN_VPCOMEQUQ
,
25873 IX86_BUILTIN_VPCOMNEUQ
,
25874 IX86_BUILTIN_VPCOMLTUQ
,
25875 IX86_BUILTIN_VPCOMLEUQ
,
25876 IX86_BUILTIN_VPCOMGTUQ
,
25877 IX86_BUILTIN_VPCOMGEUQ
,
25878 IX86_BUILTIN_VPCOMFALSEUQ
,
25879 IX86_BUILTIN_VPCOMTRUEUQ
,
25881 IX86_BUILTIN_VPCOMEQB
,
25882 IX86_BUILTIN_VPCOMNEB
,
25883 IX86_BUILTIN_VPCOMLTB
,
25884 IX86_BUILTIN_VPCOMLEB
,
25885 IX86_BUILTIN_VPCOMGTB
,
25886 IX86_BUILTIN_VPCOMGEB
,
25887 IX86_BUILTIN_VPCOMFALSEB
,
25888 IX86_BUILTIN_VPCOMTRUEB
,
25890 IX86_BUILTIN_VPCOMEQW
,
25891 IX86_BUILTIN_VPCOMNEW
,
25892 IX86_BUILTIN_VPCOMLTW
,
25893 IX86_BUILTIN_VPCOMLEW
,
25894 IX86_BUILTIN_VPCOMGTW
,
25895 IX86_BUILTIN_VPCOMGEW
,
25896 IX86_BUILTIN_VPCOMFALSEW
,
25897 IX86_BUILTIN_VPCOMTRUEW
,
25899 IX86_BUILTIN_VPCOMEQD
,
25900 IX86_BUILTIN_VPCOMNED
,
25901 IX86_BUILTIN_VPCOMLTD
,
25902 IX86_BUILTIN_VPCOMLED
,
25903 IX86_BUILTIN_VPCOMGTD
,
25904 IX86_BUILTIN_VPCOMGED
,
25905 IX86_BUILTIN_VPCOMFALSED
,
25906 IX86_BUILTIN_VPCOMTRUED
,
25908 IX86_BUILTIN_VPCOMEQQ
,
25909 IX86_BUILTIN_VPCOMNEQ
,
25910 IX86_BUILTIN_VPCOMLTQ
,
25911 IX86_BUILTIN_VPCOMLEQ
,
25912 IX86_BUILTIN_VPCOMGTQ
,
25913 IX86_BUILTIN_VPCOMGEQ
,
25914 IX86_BUILTIN_VPCOMFALSEQ
,
25915 IX86_BUILTIN_VPCOMTRUEQ
,
25917 /* LWP instructions. */
25918 IX86_BUILTIN_LLWPCB
,
25919 IX86_BUILTIN_SLWPCB
,
25920 IX86_BUILTIN_LWPVAL32
,
25921 IX86_BUILTIN_LWPVAL64
,
25922 IX86_BUILTIN_LWPINS32
,
25923 IX86_BUILTIN_LWPINS64
,
25928 IX86_BUILTIN_XBEGIN
,
25930 IX86_BUILTIN_XABORT
,
25931 IX86_BUILTIN_XTEST
,
25933 /* BMI instructions. */
25934 IX86_BUILTIN_BEXTR32
,
25935 IX86_BUILTIN_BEXTR64
,
25938 /* TBM instructions. */
25939 IX86_BUILTIN_BEXTRI32
,
25940 IX86_BUILTIN_BEXTRI64
,
25942 /* BMI2 instructions. */
25943 IX86_BUILTIN_BZHI32
,
25944 IX86_BUILTIN_BZHI64
,
25945 IX86_BUILTIN_PDEP32
,
25946 IX86_BUILTIN_PDEP64
,
25947 IX86_BUILTIN_PEXT32
,
25948 IX86_BUILTIN_PEXT64
,
25950 /* FSGSBASE instructions. */
25951 IX86_BUILTIN_RDFSBASE32
,
25952 IX86_BUILTIN_RDFSBASE64
,
25953 IX86_BUILTIN_RDGSBASE32
,
25954 IX86_BUILTIN_RDGSBASE64
,
25955 IX86_BUILTIN_WRFSBASE32
,
25956 IX86_BUILTIN_WRFSBASE64
,
25957 IX86_BUILTIN_WRGSBASE32
,
25958 IX86_BUILTIN_WRGSBASE64
,
25960 /* RDRND instructions. */
25961 IX86_BUILTIN_RDRAND16_STEP
,
25962 IX86_BUILTIN_RDRAND32_STEP
,
25963 IX86_BUILTIN_RDRAND64_STEP
,
25965 /* F16C instructions. */
25966 IX86_BUILTIN_CVTPH2PS
,
25967 IX86_BUILTIN_CVTPH2PS256
,
25968 IX86_BUILTIN_CVTPS2PH
,
25969 IX86_BUILTIN_CVTPS2PH256
,
25971 /* CFString built-in for darwin */
25972 IX86_BUILTIN_CFSTRING
,
25974 /* Builtins to get CPU type and supported features. */
25975 IX86_BUILTIN_CPU_INIT
,
25976 IX86_BUILTIN_CPU_IS
,
25977 IX86_BUILTIN_CPU_SUPPORTS
,
25982 /* Table for the ix86 builtin decls. */
25983 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
25985 /* Table of all of the builtin functions that are possible with different ISA's
25986 but are waiting to be built until a function is declared to use that
25988 struct builtin_isa
{
25989 const char *name
; /* function name */
25990 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
25991 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
25992 bool const_p
; /* true if the declaration is constant */
25993 bool set_and_not_built_p
;
25996 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
25999 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26000 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26001 function decl in the ix86_builtins array. Returns the function decl or
26002 NULL_TREE, if the builtin was not added.
26004 If the front end has a special hook for builtin functions, delay adding
26005 builtin functions that aren't in the current ISA until the ISA is changed
26006 with function specific optimization. Doing so, can save about 300K for the
26007 default compiler. When the builtin is expanded, check at that time whether
26010 If the front end doesn't have a special hook, record all builtins, even if
26011 it isn't an instruction set in the current ISA in case the user uses
26012 function specific options for a different ISA, so that we don't get scope
26013 errors if a builtin is added in the middle of a function scope. */
26016 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26017 enum ix86_builtin_func_type tcode
,
26018 enum ix86_builtins code
)
26020 tree decl
= NULL_TREE
;
26022 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26024 ix86_builtins_isa
[(int) code
].isa
= mask
;
26026 mask
&= ~OPTION_MASK_ISA_64BIT
;
26028 || (mask
& ix86_isa_flags
) != 0
26029 || (lang_hooks
.builtin_function
26030 == lang_hooks
.builtin_function_ext_scope
))
26033 tree type
= ix86_get_builtin_func_type (tcode
);
26034 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26036 ix86_builtins
[(int) code
] = decl
;
26037 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26041 ix86_builtins
[(int) code
] = NULL_TREE
;
26042 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26043 ix86_builtins_isa
[(int) code
].name
= name
;
26044 ix86_builtins_isa
[(int) code
].const_p
= false;
26045 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26052 /* Like def_builtin, but also marks the function decl "const". */
26055 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26056 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26058 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26060 TREE_READONLY (decl
) = 1;
26062 ix86_builtins_isa
[(int) code
].const_p
= true;
26067 /* Add any new builtin functions for a given ISA that may not have been
26068 declared. This saves a bit of space compared to adding all of the
26069 declarations to the tree, even if we didn't use them. */
26072 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26076 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26078 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26079 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26083 /* Don't define the builtin again. */
26084 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26086 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26087 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26088 type
, i
, BUILT_IN_MD
, NULL
,
26091 ix86_builtins
[i
] = decl
;
26092 if (ix86_builtins_isa
[i
].const_p
)
26093 TREE_READONLY (decl
) = 1;
26098 /* Bits for builtin_description.flag. */
26100 /* Set when we don't support the comparison natively, and should
26101 swap_comparison in order to support it. */
26102 #define BUILTIN_DESC_SWAP_OPERANDS 1
26104 struct builtin_description
26106 const HOST_WIDE_INT mask
;
26107 const enum insn_code icode
;
26108 const char *const name
;
26109 const enum ix86_builtins code
;
26110 const enum rtx_code comparison
;
26114 static const struct builtin_description bdesc_comi
[] =
26116 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26117 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26118 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26119 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26120 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26121 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26122 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26123 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26124 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26125 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26126 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26127 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26128 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26129 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26130 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26131 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26132 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26133 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26134 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26135 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26136 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26137 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26138 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26139 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26142 static const struct builtin_description bdesc_pcmpestr
[] =
26145 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26146 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26147 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26148 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26149 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26150 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26151 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26154 static const struct builtin_description bdesc_pcmpistr
[] =
26157 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26158 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26159 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26160 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26161 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26162 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26163 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26166 /* Special builtins with variable number of arguments. */
26167 static const struct builtin_description bdesc_special_args
[] =
26169 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26170 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26171 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26174 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26177 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26180 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26181 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26182 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26184 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26185 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26186 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26187 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26189 /* SSE or 3DNow!A */
26190 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26191 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26194 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26195 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26196 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26197 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26198 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26199 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26200 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26201 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26202 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26203 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26205 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26206 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26209 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26212 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26215 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26216 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26219 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26220 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26222 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26223 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26224 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26225 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26226 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26228 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26229 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26230 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26231 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26232 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26233 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26234 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26236 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26237 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26238 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26240 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26241 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26242 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26243 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26244 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26245 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26246 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26247 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26250 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26251 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26252 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26253 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26254 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26255 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26256 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26257 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26258 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26260 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26261 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26262 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26263 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26264 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26265 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26268 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26269 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26270 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26271 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26272 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26273 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26274 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26275 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26278 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26279 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26280 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26283 /* Builtins with variable number of arguments. */
26284 static const struct builtin_description bdesc_args
[] =
26286 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26287 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26288 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26289 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26290 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26291 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26292 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26295 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26296 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26297 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26298 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26299 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26300 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26302 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26303 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26304 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26305 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26306 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26307 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26308 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26309 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26311 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26312 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26314 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26315 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26316 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26317 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26319 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26320 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26321 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26322 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26323 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26324 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26326 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26327 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26328 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26329 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26330 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26331 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26333 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26334 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26335 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26337 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26339 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26340 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26341 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26342 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26343 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26344 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26346 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26347 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26348 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26349 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26350 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26351 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26353 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26354 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26355 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26356 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26359 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26360 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26361 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26362 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26364 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26365 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26366 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26367 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26368 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26369 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26370 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26371 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26372 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26373 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26374 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26375 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26376 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26377 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26378 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26381 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26382 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26383 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26384 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26385 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26386 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26389 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26390 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26391 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26392 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26393 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26394 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26395 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26396 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26397 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26398 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26399 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26400 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26402 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26404 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26405 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26406 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26407 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26408 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26409 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26410 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26411 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26413 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26414 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26415 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26416 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26417 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26418 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26419 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26420 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26421 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26422 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26423 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26424 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26425 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26426 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26427 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26428 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26429 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26430 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26431 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26432 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26433 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26434 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26436 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26437 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26438 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26439 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26441 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26442 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26443 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26444 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26446 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26448 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26449 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26450 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26451 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26452 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26454 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26455 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26456 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26458 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26460 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26461 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26462 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26464 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26465 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26467 /* SSE MMX or 3Dnow!A */
26468 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26469 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26470 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26472 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26473 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26474 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26475 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26477 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26478 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26480 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26483 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26485 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26486 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26487 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26488 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26489 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26491 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26492 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26493 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26494 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26495 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26497 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26499 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26500 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26501 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26502 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26504 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26505 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26506 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26508 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26509 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26510 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26511 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26512 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26513 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26514 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26515 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26517 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26518 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26519 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26520 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26521 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26522 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26523 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26524 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26525 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26526 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26527 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26528 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26529 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26530 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26531 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26532 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26533 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26534 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26535 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26536 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26538 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26539 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26540 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26541 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26543 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26544 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26545 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26546 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26548 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26550 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26551 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26552 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26554 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26556 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26557 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26558 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26559 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26560 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26561 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26562 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26563 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26565 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26566 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26567 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26568 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26569 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26570 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26571 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26572 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26574 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26575 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26577 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26578 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26579 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26580 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26582 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26583 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26585 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26586 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26587 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26588 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26589 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26590 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26592 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26593 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26594 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26595 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26597 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26598 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26599 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26600 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26601 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26602 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26603 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26604 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26606 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26607 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26608 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26610 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26611 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26613 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26614 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26616 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26618 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26619 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26620 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26621 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26623 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26624 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26625 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26626 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26627 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26628 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26629 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26631 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26632 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26633 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26634 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26635 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26636 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26637 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26639 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26640 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26641 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26642 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26644 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26645 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26646 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26648 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26650 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26653 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26654 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26657 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26658 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26660 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26661 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26662 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26663 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26664 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26665 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26668 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26669 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26670 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26671 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26672 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26673 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26675 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26676 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26677 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26678 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26679 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26680 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26681 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26682 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26683 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26684 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26685 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26686 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26687 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26688 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26689 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26690 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26691 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26692 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26693 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26694 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26695 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26696 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26697 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26698 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26701 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26702 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26705 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26706 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26707 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26708 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26709 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26710 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26711 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26712 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26713 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26714 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26716 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26717 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26718 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26719 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26720 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26721 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26722 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26723 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26724 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26725 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26726 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26727 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26728 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26730 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26731 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26732 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26733 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26734 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26735 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26736 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26737 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26738 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26739 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26740 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26741 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26744 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26745 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26746 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26747 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26749 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26750 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26751 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26752 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26754 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26755 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26757 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26758 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26760 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26761 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26762 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26763 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26765 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26766 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26768 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26769 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26771 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26772 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26773 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26776 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26777 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26778 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26779 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26780 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26783 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26784 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26785 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26786 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26789 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26790 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26792 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26794 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26795 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26798 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26801 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26802 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26803 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26804 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26805 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26806 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26807 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26808 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26809 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26810 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26811 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26812 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26813 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26814 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26815 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26816 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26817 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26818 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26819 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26820 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26821 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26822 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26823 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26824 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26825 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26826 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26828 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26829 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26830 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26831 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26833 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26834 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26835 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26836 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26837 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26838 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26839 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26840 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26841 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26842 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26843 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26844 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26845 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26846 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26847 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26848 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26849 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26850 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26851 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26852 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26853 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
26854 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26855 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26856 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26857 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26858 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26859 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26860 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26861 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26862 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26863 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26864 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
26865 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
26866 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
26868 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26869 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26870 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26872 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26873 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26874 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26875 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26876 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26878 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26880 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26881 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26883 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26884 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
26885 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
26886 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26888 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26889 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26891 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26892 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26894 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26895 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
26896 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
26897 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26899 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
26900 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
26902 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26903 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26905 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26906 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26907 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26908 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26910 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26911 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26912 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26913 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
26914 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
26915 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
26917 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26918 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26919 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26920 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26921 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26922 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26923 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26924 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26925 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26926 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26927 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26928 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26929 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26930 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26931 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26933 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
26934 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
26936 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26937 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26939 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26942 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
26943 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
26944 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
26945 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
26946 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26947 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26948 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26949 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26950 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26951 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26952 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26953 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26954 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26955 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26956 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26957 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26958 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
26959 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26960 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26961 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26962 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26963 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
26964 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
26965 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26966 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26967 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26968 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26969 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26970 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26971 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26972 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26973 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26974 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26975 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26976 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26977 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26978 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26979 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26980 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
26981 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26982 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26983 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26984 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26985 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26986 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26987 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26988 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26989 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26990 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26991 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26992 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26993 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
26994 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26995 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26996 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26997 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26998 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26999 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27000 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27001 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27002 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27003 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27004 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27005 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27006 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mulv4siv4di3
, "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27007 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27008 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27009 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27010 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27011 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27012 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulv4siv4di3
, "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27013 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27014 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27015 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27016 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27017 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27018 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27019 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27020 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27021 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27022 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27023 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27024 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27025 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27026 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27027 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27028 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27029 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27030 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27031 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27032 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27033 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27034 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27035 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27036 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27037 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27038 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27039 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27040 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27041 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27042 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27043 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27044 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27045 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27046 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27047 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27048 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27049 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27050 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27051 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27052 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27053 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27054 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27055 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27056 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27057 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27058 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27059 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27060 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27061 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27062 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27063 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27064 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27065 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27066 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27067 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27068 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27069 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27070 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27071 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27072 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27073 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27074 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27075 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27076 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27077 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27078 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27079 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27080 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27081 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27082 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27083 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27084 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27085 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27086 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27087 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27089 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27092 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27093 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27094 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27097 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27098 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27101 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27102 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27103 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27104 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27107 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27108 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27109 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27110 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27111 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27112 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27115 /* FMA4 and XOP. */
27116 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27117 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27118 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27119 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27120 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27121 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27122 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27123 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27124 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27125 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27126 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27127 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27128 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27129 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27130 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27131 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27132 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27133 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27134 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27135 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27136 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27137 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27138 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27139 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27140 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27141 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27142 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27143 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27144 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27145 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27146 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27147 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27148 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27149 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27150 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27151 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27152 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27153 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27154 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27155 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27156 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27157 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27158 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27159 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27160 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27161 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27162 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27163 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27164 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27165 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27166 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27167 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27169 static const struct builtin_description bdesc_multi_arg
[] =
27171 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27172 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27173 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27174 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27175 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27176 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27178 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27179 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27180 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27181 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27182 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27183 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27185 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27186 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27187 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27188 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27189 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27190 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27191 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27192 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27193 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27194 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27195 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27196 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27198 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27199 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27200 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27201 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27202 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27203 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27204 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27205 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27206 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27207 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27208 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27209 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27211 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27212 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27213 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27214 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27215 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27216 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27217 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27219 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27220 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27221 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27222 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27223 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27224 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27225 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27227 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27229 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27230 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27231 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27232 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27233 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27234 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27235 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27236 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27237 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27238 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27239 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27240 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27242 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27243 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27244 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27245 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27246 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27247 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27248 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27249 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27250 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27251 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27252 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27253 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27254 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27255 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27256 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27257 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27259 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27260 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27261 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27262 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27263 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27264 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27266 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27267 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27268 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27269 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27270 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27271 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27272 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27273 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27274 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27275 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27276 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27277 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27278 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27279 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27280 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27282 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27283 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27284 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27285 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27286 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27287 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27288 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27290 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27291 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27292 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27293 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27294 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27295 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27296 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27298 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27299 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27300 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27301 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27302 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27303 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27304 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27306 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27307 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27308 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27309 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27310 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27311 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27312 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27314 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27315 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27316 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27317 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27318 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27319 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27320 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27322 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27323 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27324 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27325 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27326 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27327 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27328 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27330 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27331 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27332 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27333 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27334 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27335 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27336 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27338 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27339 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27340 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27341 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27342 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27343 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27344 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27346 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27347 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27348 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27349 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27350 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27351 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27352 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27353 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27355 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27356 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27357 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27358 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27359 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27360 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27361 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27362 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27364 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27365 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27366 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27367 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27371 /* TM vector builtins. */
27373 /* Reuse the existing x86-specific `struct builtin_description' cause
27374 we're lazy. Add casts to make them fit. */
27375 static const struct builtin_description bdesc_tm
[] =
27377 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27378 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27379 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27380 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27381 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27382 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27383 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27385 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27386 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27387 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27388 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27389 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27390 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27391 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27393 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27394 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27395 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27396 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27397 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27398 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27399 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27401 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27402 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27403 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27406 /* TM callbacks. */
27408 /* Return the builtin decl needed to load a vector of TYPE. */
27411 ix86_builtin_tm_load (tree type
)
27413 if (TREE_CODE (type
) == VECTOR_TYPE
)
27415 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27418 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27420 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27422 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27428 /* Return the builtin decl needed to store a vector of TYPE. */
27431 ix86_builtin_tm_store (tree type
)
27433 if (TREE_CODE (type
) == VECTOR_TYPE
)
27435 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27438 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27440 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27442 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27448 /* Initialize the transactional memory vector load/store builtins. */
27451 ix86_init_tm_builtins (void)
27453 enum ix86_builtin_func_type ftype
;
27454 const struct builtin_description
*d
;
27457 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27458 tree attrs_log
, attrs_type_log
;
27463 /* If there are no builtins defined, we must be compiling in a
27464 language without trans-mem support. */
27465 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
27468 /* Use whatever attributes a normal TM load has. */
27469 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27470 attrs_load
= DECL_ATTRIBUTES (decl
);
27471 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27472 /* Use whatever attributes a normal TM store has. */
27473 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27474 attrs_store
= DECL_ATTRIBUTES (decl
);
27475 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27476 /* Use whatever attributes a normal TM log has. */
27477 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27478 attrs_log
= DECL_ATTRIBUTES (decl
);
27479 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27481 for (i
= 0, d
= bdesc_tm
;
27482 i
< ARRAY_SIZE (bdesc_tm
);
27485 if ((d
->mask
& ix86_isa_flags
) != 0
27486 || (lang_hooks
.builtin_function
27487 == lang_hooks
.builtin_function_ext_scope
))
27489 tree type
, attrs
, attrs_type
;
27490 enum built_in_function code
= (enum built_in_function
) d
->code
;
27492 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27493 type
= ix86_get_builtin_func_type (ftype
);
27495 if (BUILTIN_TM_LOAD_P (code
))
27497 attrs
= attrs_load
;
27498 attrs_type
= attrs_type_load
;
27500 else if (BUILTIN_TM_STORE_P (code
))
27502 attrs
= attrs_store
;
27503 attrs_type
= attrs_type_store
;
27508 attrs_type
= attrs_type_log
;
27510 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27511 /* The builtin without the prefix for
27512 calling it directly. */
27513 d
->name
+ strlen ("__builtin_"),
27515 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27516 set the TYPE_ATTRIBUTES. */
27517 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27519 set_builtin_decl (code
, decl
, false);
27524 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27525 in the current target ISA to allow the user to compile particular modules
27526 with different target specific options that differ from the command line
27529 ix86_init_mmx_sse_builtins (void)
27531 const struct builtin_description
* d
;
27532 enum ix86_builtin_func_type ftype
;
27535 /* Add all special builtins with variable number of operands. */
27536 for (i
= 0, d
= bdesc_special_args
;
27537 i
< ARRAY_SIZE (bdesc_special_args
);
27543 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27544 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27547 /* Add all builtins with variable number of operands. */
27548 for (i
= 0, d
= bdesc_args
;
27549 i
< ARRAY_SIZE (bdesc_args
);
27555 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27556 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27559 /* pcmpestr[im] insns. */
27560 for (i
= 0, d
= bdesc_pcmpestr
;
27561 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27564 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27565 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27567 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27568 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27571 /* pcmpistr[im] insns. */
27572 for (i
= 0, d
= bdesc_pcmpistr
;
27573 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27576 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27577 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27579 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27580 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27583 /* comi/ucomi insns. */
27584 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27586 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27587 ftype
= INT_FTYPE_V2DF_V2DF
;
27589 ftype
= INT_FTYPE_V4SF_V4SF
;
27590 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27594 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27595 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27596 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27597 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27599 /* SSE or 3DNow!A */
27600 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27601 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27602 IX86_BUILTIN_MASKMOVQ
);
27605 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27606 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27608 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27609 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27610 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27611 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27614 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27615 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27616 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27617 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27620 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27621 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27622 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27623 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27624 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27625 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27626 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27627 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27628 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27629 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27630 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27631 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27634 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27635 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27638 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27639 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27640 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27641 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27642 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27643 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27644 IX86_BUILTIN_RDRAND64_STEP
);
27647 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27648 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27649 IX86_BUILTIN_GATHERSIV2DF
);
27651 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27652 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27653 IX86_BUILTIN_GATHERSIV4DF
);
27655 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27656 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27657 IX86_BUILTIN_GATHERDIV2DF
);
27659 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27660 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27661 IX86_BUILTIN_GATHERDIV4DF
);
27663 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27664 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27665 IX86_BUILTIN_GATHERSIV4SF
);
27667 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27668 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27669 IX86_BUILTIN_GATHERSIV8SF
);
27671 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27672 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27673 IX86_BUILTIN_GATHERDIV4SF
);
27675 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27676 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27677 IX86_BUILTIN_GATHERDIV8SF
);
27679 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27680 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27681 IX86_BUILTIN_GATHERSIV2DI
);
27683 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27684 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27685 IX86_BUILTIN_GATHERSIV4DI
);
27687 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27688 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27689 IX86_BUILTIN_GATHERDIV2DI
);
27691 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27692 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27693 IX86_BUILTIN_GATHERDIV4DI
);
27695 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27696 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27697 IX86_BUILTIN_GATHERSIV4SI
);
27699 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27700 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27701 IX86_BUILTIN_GATHERSIV8SI
);
27703 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27704 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27705 IX86_BUILTIN_GATHERDIV4SI
);
27707 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27708 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27709 IX86_BUILTIN_GATHERDIV8SI
);
27711 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27712 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27713 IX86_BUILTIN_GATHERALTSIV4DF
);
27715 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27716 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27717 IX86_BUILTIN_GATHERALTDIV8SF
);
27719 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27720 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27721 IX86_BUILTIN_GATHERALTSIV4DI
);
27723 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27724 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27725 IX86_BUILTIN_GATHERALTDIV8SI
);
27728 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
27729 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
27731 /* MMX access to the vec_init patterns. */
27732 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27733 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27735 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27736 V4HI_FTYPE_HI_HI_HI_HI
,
27737 IX86_BUILTIN_VEC_INIT_V4HI
);
27739 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27740 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27741 IX86_BUILTIN_VEC_INIT_V8QI
);
27743 /* Access to the vec_extract patterns. */
27744 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27745 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27746 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27747 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27748 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27749 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27750 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27751 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27752 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27753 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27755 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27756 "__builtin_ia32_vec_ext_v4hi",
27757 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27759 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27760 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27762 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27763 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27765 /* Access to the vec_set patterns. */
27766 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27767 "__builtin_ia32_vec_set_v2di",
27768 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27770 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27771 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27773 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27774 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27776 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27777 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27779 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27780 "__builtin_ia32_vec_set_v4hi",
27781 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27783 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27784 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27786 /* Add FMA4 multi-arg argument instructions */
27787 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27792 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27793 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27797 /* This builds the processor_model struct type defined in
27798 libgcc/config/i386/cpuinfo.c */
27801 build_processor_model_struct (void)
27803 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
27805 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
27807 tree type
= make_node (RECORD_TYPE
);
27809 /* The first 3 fields are unsigned int. */
27810 for (i
= 0; i
< 3; ++i
)
27812 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
27813 get_identifier (field_name
[i
]), unsigned_type_node
);
27814 if (field_chain
!= NULL_TREE
)
27815 DECL_CHAIN (field
) = field_chain
;
27816 field_chain
= field
;
27819 /* The last field is an array of unsigned integers of size one. */
27820 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
27821 get_identifier (field_name
[3]),
27822 build_array_type (unsigned_type_node
,
27823 build_index_type (size_one_node
)));
27824 if (field_chain
!= NULL_TREE
)
27825 DECL_CHAIN (field
) = field_chain
;
27826 field_chain
= field
;
27828 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
27832 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
27835 make_var_decl (tree type
, const char *name
)
27839 new_decl
= build_decl (UNKNOWN_LOCATION
,
27841 get_identifier(name
),
27844 DECL_EXTERNAL (new_decl
) = 1;
27845 TREE_STATIC (new_decl
) = 1;
27846 TREE_PUBLIC (new_decl
) = 1;
27847 DECL_INITIAL (new_decl
) = 0;
27848 DECL_ARTIFICIAL (new_decl
) = 0;
27849 DECL_PRESERVE_P (new_decl
) = 1;
27851 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
27852 assemble_variable (new_decl
, 0, 0, 0);
27857 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
27858 into an integer defined in libgcc/config/i386/cpuinfo.c */
27861 fold_builtin_cpu (tree fndecl
, tree
*args
)
27864 enum ix86_builtins fn_code
= (enum ix86_builtins
)
27865 DECL_FUNCTION_CODE (fndecl
);
27866 tree param_string_cst
= NULL
;
27868 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
27869 enum processor_features
27885 /* These are the values for vendor types and cpu types and subtypes
27886 in cpuinfo.c. Cpu types and subtypes should be subtracted by
27887 the corresponding start value. */
27888 enum processor_model
27898 M_CPU_SUBTYPE_START
,
27899 M_INTEL_COREI7_NEHALEM
,
27900 M_INTEL_COREI7_WESTMERE
,
27901 M_INTEL_COREI7_SANDYBRIDGE
,
27902 M_AMDFAM10H_BARCELONA
,
27903 M_AMDFAM10H_SHANGHAI
,
27904 M_AMDFAM10H_ISTANBUL
,
27905 M_AMDFAM15H_BDVER1
,
27909 static struct _arch_names_table
27911 const char *const name
;
27912 const enum processor_model model
;
27914 const arch_names_table
[] =
27917 {"intel", M_INTEL
},
27918 {"atom", M_INTEL_ATOM
},
27919 {"core2", M_INTEL_CORE2
},
27920 {"corei7", M_INTEL_COREI7
},
27921 {"nehalem", M_INTEL_COREI7_NEHALEM
},
27922 {"westmere", M_INTEL_COREI7_WESTMERE
},
27923 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
27924 {"amdfam10h", M_AMDFAM10H
},
27925 {"barcelona", M_AMDFAM10H_BARCELONA
},
27926 {"shanghai", M_AMDFAM10H_SHANGHAI
},
27927 {"istanbul", M_AMDFAM10H_ISTANBUL
},
27928 {"amdfam15h", M_AMDFAM15H
},
27929 {"bdver1", M_AMDFAM15H_BDVER1
},
27930 {"bdver2", M_AMDFAM15H_BDVER2
},
27933 static struct _isa_names_table
27935 const char *const name
;
27936 const enum processor_features feature
;
27938 const isa_names_table
[] =
27942 {"popcnt", F_POPCNT
},
27946 {"ssse3", F_SSSE3
},
27947 {"sse4.1", F_SSE4_1
},
27948 {"sse4.2", F_SSE4_2
},
27953 static tree __processor_model_type
= NULL_TREE
;
27954 static tree __cpu_model_var
= NULL_TREE
;
27956 if (__processor_model_type
== NULL_TREE
)
27957 __processor_model_type
= build_processor_model_struct ();
27959 if (__cpu_model_var
== NULL_TREE
)
27960 __cpu_model_var
= make_var_decl (__processor_model_type
,
27963 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
27965 param_string_cst
= *args
;
27966 while (param_string_cst
27967 && TREE_CODE (param_string_cst
) != STRING_CST
)
27969 /* *args must be a expr that can contain other EXPRS leading to a
27971 if (!EXPR_P (param_string_cst
))
27973 error ("Parameter to builtin must be a string constant or literal");
27974 return integer_zero_node
;
27976 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
27979 gcc_assert (param_string_cst
);
27981 if (fn_code
== IX86_BUILTIN_CPU_IS
)
27985 unsigned int field_val
= 0;
27986 unsigned int NUM_ARCH_NAMES
27987 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
27989 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
27990 if (strcmp (arch_names_table
[i
].name
,
27991 TREE_STRING_POINTER (param_string_cst
)) == 0)
27994 if (i
== NUM_ARCH_NAMES
)
27996 error ("Parameter to builtin not valid: %s",
27997 TREE_STRING_POINTER (param_string_cst
));
27998 return integer_zero_node
;
28001 field
= TYPE_FIELDS (__processor_model_type
);
28002 field_val
= arch_names_table
[i
].model
;
28004 /* CPU types are stored in the next field. */
28005 if (field_val
> M_CPU_TYPE_START
28006 && field_val
< M_CPU_SUBTYPE_START
)
28008 field
= DECL_CHAIN (field
);
28009 field_val
-= M_CPU_TYPE_START
;
28012 /* CPU subtypes are stored in the next field. */
28013 if (field_val
> M_CPU_SUBTYPE_START
)
28015 field
= DECL_CHAIN ( DECL_CHAIN (field
));
28016 field_val
-= M_CPU_SUBTYPE_START
;
28019 /* Get the appropriate field in __cpu_model. */
28020 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
28023 /* Check the value. */
28024 return build2 (EQ_EXPR
, unsigned_type_node
, ref
,
28025 build_int_cstu (unsigned_type_node
, field_val
));
28027 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28032 unsigned int field_val
= 0;
28033 unsigned int NUM_ISA_NAMES
28034 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
28036 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
28037 if (strcmp (isa_names_table
[i
].name
,
28038 TREE_STRING_POINTER (param_string_cst
)) == 0)
28041 if (i
== NUM_ISA_NAMES
)
28043 error ("Parameter to builtin not valid: %s",
28044 TREE_STRING_POINTER (param_string_cst
));
28045 return integer_zero_node
;
28048 field
= TYPE_FIELDS (__processor_model_type
);
28049 /* Get the last field, which is __cpu_features. */
28050 while (DECL_CHAIN (field
))
28051 field
= DECL_CHAIN (field
);
28053 /* Get the appropriate field: __cpu_model.__cpu_features */
28054 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
28057 /* Access the 0th element of __cpu_features array. */
28058 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
28059 integer_zero_node
, NULL_TREE
, NULL_TREE
);
28061 field_val
= (1 << isa_names_table
[i
].feature
);
28062 /* Return __cpu_model.__cpu_features[0] & field_val */
28063 return build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
28064 build_int_cstu (unsigned_type_node
, field_val
));
28066 gcc_unreachable ();
28070 ix86_fold_builtin (tree fndecl
, int n_args
,
28071 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
28073 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
28075 enum ix86_builtins fn_code
= (enum ix86_builtins
)
28076 DECL_FUNCTION_CODE (fndecl
);
28077 if (fn_code
== IX86_BUILTIN_CPU_IS
28078 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28080 gcc_assert (n_args
== 1);
28081 return fold_builtin_cpu (fndecl
, args
);
28088 /* Make builtins to detect cpu type and features supported. NAME is
28089 the builtin name, CODE is the builtin code, and FTYPE is the function
28090 type of the builtin. */
28093 make_cpu_type_builtin (const char* name
, int code
,
28094 enum ix86_builtin_func_type ftype
, bool is_const
)
28099 type
= ix86_get_builtin_func_type (ftype
);
28100 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
28102 gcc_assert (decl
!= NULL_TREE
);
28103 ix86_builtins
[(int) code
] = decl
;
28104 TREE_READONLY (decl
) = is_const
;
28107 /* Make builtins to get CPU type and features supported. The created
28110 __builtin_cpu_init (), to detect cpu type and features,
28111 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
28112 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
28116 ix86_init_platform_type_builtins (void)
28118 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
28119 INT_FTYPE_VOID
, false);
28120 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
28121 INT_FTYPE_PCCHAR
, true);
28122 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
28123 INT_FTYPE_PCCHAR
, true);
28126 /* Internal method for ix86_init_builtins. */
28129 ix86_init_builtins_va_builtins_abi (void)
28131 tree ms_va_ref
, sysv_va_ref
;
28132 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
28133 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
28134 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
28135 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
28139 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
28140 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
28141 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
28143 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
28146 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28147 fnvoid_va_start_ms
=
28148 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28149 fnvoid_va_end_sysv
=
28150 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
28151 fnvoid_va_start_sysv
=
28152 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
28154 fnvoid_va_copy_ms
=
28155 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
28157 fnvoid_va_copy_sysv
=
28158 build_function_type_list (void_type_node
, sysv_va_ref
,
28159 sysv_va_ref
, NULL_TREE
);
28161 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
28162 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28163 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
28164 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28165 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
28166 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28167 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
28168 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28169 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
28170 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28171 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
28172 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28176 ix86_init_builtin_types (void)
28178 tree float128_type_node
, float80_type_node
;
28180 /* The __float80 type. */
28181 float80_type_node
= long_double_type_node
;
28182 if (TYPE_MODE (float80_type_node
) != XFmode
)
28184 /* The __float80 type. */
28185 float80_type_node
= make_node (REAL_TYPE
);
28187 TYPE_PRECISION (float80_type_node
) = 80;
28188 layout_type (float80_type_node
);
28190 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
28192 /* The __float128 type. */
28193 float128_type_node
= make_node (REAL_TYPE
);
28194 TYPE_PRECISION (float128_type_node
) = 128;
28195 layout_type (float128_type_node
);
28196 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
28198 /* This macro is built by i386-builtin-types.awk. */
28199 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
28203 ix86_init_builtins (void)
28207 ix86_init_builtin_types ();
28209 /* Builtins to get CPU type and features. */
28210 ix86_init_platform_type_builtins ();
28212 /* TFmode support builtins. */
28213 def_builtin_const (0, "__builtin_infq",
28214 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
28215 def_builtin_const (0, "__builtin_huge_valq",
28216 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
28218 /* We will expand them to normal call if SSE isn't available since
28219 they are used by libgcc. */
28220 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
28221 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
28222 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
28223 TREE_READONLY (t
) = 1;
28224 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
28226 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
28227 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
28228 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
28229 TREE_READONLY (t
) = 1;
28230 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
28232 ix86_init_tm_builtins ();
28233 ix86_init_mmx_sse_builtins ();
28236 ix86_init_builtins_va_builtins_abi ();
28238 #ifdef SUBTARGET_INIT_BUILTINS
28239 SUBTARGET_INIT_BUILTINS
;
28243 /* Return the ix86 builtin for CODE. */
28246 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
28248 if (code
>= IX86_BUILTIN_MAX
)
28249 return error_mark_node
;
28251 return ix86_builtins
[code
];
28254 /* Errors in the source file can cause expand_expr to return const0_rtx
28255 where we expect a vector. To avoid crashing, use one of the vector
28256 clear instructions. */
28258 safe_vector_operand (rtx x
, enum machine_mode mode
)
28260 if (x
== const0_rtx
)
28261 x
= CONST0_RTX (mode
);
28265 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
28268 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
28271 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28272 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28273 rtx op0
= expand_normal (arg0
);
28274 rtx op1
= expand_normal (arg1
);
28275 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28276 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28277 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
28279 if (VECTOR_MODE_P (mode0
))
28280 op0
= safe_vector_operand (op0
, mode0
);
28281 if (VECTOR_MODE_P (mode1
))
28282 op1
= safe_vector_operand (op1
, mode1
);
28284 if (optimize
|| !target
28285 || GET_MODE (target
) != tmode
28286 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28287 target
= gen_reg_rtx (tmode
);
28289 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
28291 rtx x
= gen_reg_rtx (V4SImode
);
28292 emit_insn (gen_sse2_loadd (x
, op1
));
28293 op1
= gen_lowpart (TImode
, x
);
28296 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28297 op0
= copy_to_mode_reg (mode0
, op0
);
28298 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
28299 op1
= copy_to_mode_reg (mode1
, op1
);
28301 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28310 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
28313 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
28314 enum ix86_builtin_func_type m_type
,
28315 enum rtx_code sub_code
)
28320 bool comparison_p
= false;
28322 bool last_arg_constant
= false;
28323 int num_memory
= 0;
28326 enum machine_mode mode
;
28329 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28333 case MULTI_ARG_4_DF2_DI_I
:
28334 case MULTI_ARG_4_DF2_DI_I1
:
28335 case MULTI_ARG_4_SF2_SI_I
:
28336 case MULTI_ARG_4_SF2_SI_I1
:
28338 last_arg_constant
= true;
28341 case MULTI_ARG_3_SF
:
28342 case MULTI_ARG_3_DF
:
28343 case MULTI_ARG_3_SF2
:
28344 case MULTI_ARG_3_DF2
:
28345 case MULTI_ARG_3_DI
:
28346 case MULTI_ARG_3_SI
:
28347 case MULTI_ARG_3_SI_DI
:
28348 case MULTI_ARG_3_HI
:
28349 case MULTI_ARG_3_HI_SI
:
28350 case MULTI_ARG_3_QI
:
28351 case MULTI_ARG_3_DI2
:
28352 case MULTI_ARG_3_SI2
:
28353 case MULTI_ARG_3_HI2
:
28354 case MULTI_ARG_3_QI2
:
28358 case MULTI_ARG_2_SF
:
28359 case MULTI_ARG_2_DF
:
28360 case MULTI_ARG_2_DI
:
28361 case MULTI_ARG_2_SI
:
28362 case MULTI_ARG_2_HI
:
28363 case MULTI_ARG_2_QI
:
28367 case MULTI_ARG_2_DI_IMM
:
28368 case MULTI_ARG_2_SI_IMM
:
28369 case MULTI_ARG_2_HI_IMM
:
28370 case MULTI_ARG_2_QI_IMM
:
28372 last_arg_constant
= true;
28375 case MULTI_ARG_1_SF
:
28376 case MULTI_ARG_1_DF
:
28377 case MULTI_ARG_1_SF2
:
28378 case MULTI_ARG_1_DF2
:
28379 case MULTI_ARG_1_DI
:
28380 case MULTI_ARG_1_SI
:
28381 case MULTI_ARG_1_HI
:
28382 case MULTI_ARG_1_QI
:
28383 case MULTI_ARG_1_SI_DI
:
28384 case MULTI_ARG_1_HI_DI
:
28385 case MULTI_ARG_1_HI_SI
:
28386 case MULTI_ARG_1_QI_DI
:
28387 case MULTI_ARG_1_QI_SI
:
28388 case MULTI_ARG_1_QI_HI
:
28392 case MULTI_ARG_2_DI_CMP
:
28393 case MULTI_ARG_2_SI_CMP
:
28394 case MULTI_ARG_2_HI_CMP
:
28395 case MULTI_ARG_2_QI_CMP
:
28397 comparison_p
= true;
28400 case MULTI_ARG_2_SF_TF
:
28401 case MULTI_ARG_2_DF_TF
:
28402 case MULTI_ARG_2_DI_TF
:
28403 case MULTI_ARG_2_SI_TF
:
28404 case MULTI_ARG_2_HI_TF
:
28405 case MULTI_ARG_2_QI_TF
:
28411 gcc_unreachable ();
28414 if (optimize
|| !target
28415 || GET_MODE (target
) != tmode
28416 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28417 target
= gen_reg_rtx (tmode
);
28419 gcc_assert (nargs
<= 4);
28421 for (i
= 0; i
< nargs
; i
++)
28423 tree arg
= CALL_EXPR_ARG (exp
, i
);
28424 rtx op
= expand_normal (arg
);
28425 int adjust
= (comparison_p
) ? 1 : 0;
28426 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
28428 if (last_arg_constant
&& i
== nargs
- 1)
28430 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
28432 enum insn_code new_icode
= icode
;
28435 case CODE_FOR_xop_vpermil2v2df3
:
28436 case CODE_FOR_xop_vpermil2v4sf3
:
28437 case CODE_FOR_xop_vpermil2v4df3
:
28438 case CODE_FOR_xop_vpermil2v8sf3
:
28439 error ("the last argument must be a 2-bit immediate");
28440 return gen_reg_rtx (tmode
);
28441 case CODE_FOR_xop_rotlv2di3
:
28442 new_icode
= CODE_FOR_rotlv2di3
;
28444 case CODE_FOR_xop_rotlv4si3
:
28445 new_icode
= CODE_FOR_rotlv4si3
;
28447 case CODE_FOR_xop_rotlv8hi3
:
28448 new_icode
= CODE_FOR_rotlv8hi3
;
28450 case CODE_FOR_xop_rotlv16qi3
:
28451 new_icode
= CODE_FOR_rotlv16qi3
;
28453 if (CONST_INT_P (op
))
28455 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
28456 op
= GEN_INT (INTVAL (op
) & mask
);
28457 gcc_checking_assert
28458 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
28462 gcc_checking_assert
28464 && insn_data
[new_icode
].operand
[0].mode
== tmode
28465 && insn_data
[new_icode
].operand
[1].mode
== tmode
28466 && insn_data
[new_icode
].operand
[2].mode
== mode
28467 && insn_data
[new_icode
].operand
[0].predicate
28468 == insn_data
[icode
].operand
[0].predicate
28469 && insn_data
[new_icode
].operand
[1].predicate
28470 == insn_data
[icode
].operand
[1].predicate
);
28476 gcc_unreachable ();
28483 if (VECTOR_MODE_P (mode
))
28484 op
= safe_vector_operand (op
, mode
);
28486 /* If we aren't optimizing, only allow one memory operand to be
28488 if (memory_operand (op
, mode
))
28491 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
28494 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
28496 op
= force_reg (mode
, op
);
28500 args
[i
].mode
= mode
;
28506 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
28511 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
28512 GEN_INT ((int)sub_code
));
28513 else if (! comparison_p
)
28514 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
28517 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
28521 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
28526 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
28530 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
28534 gcc_unreachable ();
28544 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28545 insns with vec_merge. */
28548 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
28552 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28553 rtx op1
, op0
= expand_normal (arg0
);
28554 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28555 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28557 if (optimize
|| !target
28558 || GET_MODE (target
) != tmode
28559 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28560 target
= gen_reg_rtx (tmode
);
28562 if (VECTOR_MODE_P (mode0
))
28563 op0
= safe_vector_operand (op0
, mode0
);
28565 if ((optimize
&& !register_operand (op0
, mode0
))
28566 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28567 op0
= copy_to_mode_reg (mode0
, op0
);
28570 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
28571 op1
= copy_to_mode_reg (mode0
, op1
);
28573 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28580 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28583 ix86_expand_sse_compare (const struct builtin_description
*d
,
28584 tree exp
, rtx target
, bool swap
)
28587 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28588 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28589 rtx op0
= expand_normal (arg0
);
28590 rtx op1
= expand_normal (arg1
);
28592 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28593 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28594 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28595 enum rtx_code comparison
= d
->comparison
;
28597 if (VECTOR_MODE_P (mode0
))
28598 op0
= safe_vector_operand (op0
, mode0
);
28599 if (VECTOR_MODE_P (mode1
))
28600 op1
= safe_vector_operand (op1
, mode1
);
28602 /* Swap operands if we have a comparison that isn't available in
28606 rtx tmp
= gen_reg_rtx (mode1
);
28607 emit_move_insn (tmp
, op1
);
28612 if (optimize
|| !target
28613 || GET_MODE (target
) != tmode
28614 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28615 target
= gen_reg_rtx (tmode
);
28617 if ((optimize
&& !register_operand (op0
, mode0
))
28618 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
28619 op0
= copy_to_mode_reg (mode0
, op0
);
28620 if ((optimize
&& !register_operand (op1
, mode1
))
28621 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
28622 op1
= copy_to_mode_reg (mode1
, op1
);
28624 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
28625 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28632 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28635 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28639 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28640 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28641 rtx op0
= expand_normal (arg0
);
28642 rtx op1
= expand_normal (arg1
);
28643 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28644 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28645 enum rtx_code comparison
= d
->comparison
;
28647 if (VECTOR_MODE_P (mode0
))
28648 op0
= safe_vector_operand (op0
, mode0
);
28649 if (VECTOR_MODE_P (mode1
))
28650 op1
= safe_vector_operand (op1
, mode1
);
28652 /* Swap operands if we have a comparison that isn't available in
28654 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28661 target
= gen_reg_rtx (SImode
);
28662 emit_move_insn (target
, const0_rtx
);
28663 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28665 if ((optimize
&& !register_operand (op0
, mode0
))
28666 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28667 op0
= copy_to_mode_reg (mode0
, op0
);
28668 if ((optimize
&& !register_operand (op1
, mode1
))
28669 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28670 op1
= copy_to_mode_reg (mode1
, op1
);
28672 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28676 emit_insn (gen_rtx_SET (VOIDmode
,
28677 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28678 gen_rtx_fmt_ee (comparison
, QImode
,
28682 return SUBREG_REG (target
);
28685 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28688 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28692 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28693 rtx op1
, op0
= expand_normal (arg0
);
28694 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28695 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28697 if (optimize
|| target
== 0
28698 || GET_MODE (target
) != tmode
28699 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28700 target
= gen_reg_rtx (tmode
);
28702 if (VECTOR_MODE_P (mode0
))
28703 op0
= safe_vector_operand (op0
, mode0
);
28705 if ((optimize
&& !register_operand (op0
, mode0
))
28706 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28707 op0
= copy_to_mode_reg (mode0
, op0
);
28709 op1
= GEN_INT (d
->comparison
);
28711 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28719 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28720 tree exp
, rtx target
)
28723 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28724 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28725 rtx op0
= expand_normal (arg0
);
28726 rtx op1
= expand_normal (arg1
);
28728 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28729 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28730 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28732 if (optimize
|| target
== 0
28733 || GET_MODE (target
) != tmode
28734 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28735 target
= gen_reg_rtx (tmode
);
28737 op0
= safe_vector_operand (op0
, mode0
);
28738 op1
= safe_vector_operand (op1
, mode1
);
28740 if ((optimize
&& !register_operand (op0
, mode0
))
28741 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28742 op0
= copy_to_mode_reg (mode0
, op0
);
28743 if ((optimize
&& !register_operand (op1
, mode1
))
28744 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28745 op1
= copy_to_mode_reg (mode1
, op1
);
28747 op2
= GEN_INT (d
->comparison
);
28749 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28756 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28759 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
28763 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28764 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28765 rtx op0
= expand_normal (arg0
);
28766 rtx op1
= expand_normal (arg1
);
28767 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28768 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28769 enum rtx_code comparison
= d
->comparison
;
28771 if (VECTOR_MODE_P (mode0
))
28772 op0
= safe_vector_operand (op0
, mode0
);
28773 if (VECTOR_MODE_P (mode1
))
28774 op1
= safe_vector_operand (op1
, mode1
);
28776 target
= gen_reg_rtx (SImode
);
28777 emit_move_insn (target
, const0_rtx
);
28778 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28780 if ((optimize
&& !register_operand (op0
, mode0
))
28781 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28782 op0
= copy_to_mode_reg (mode0
, op0
);
28783 if ((optimize
&& !register_operand (op1
, mode1
))
28784 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28785 op1
= copy_to_mode_reg (mode1
, op1
);
28787 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28791 emit_insn (gen_rtx_SET (VOIDmode
,
28792 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28793 gen_rtx_fmt_ee (comparison
, QImode
,
28797 return SUBREG_REG (target
);
28800 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28803 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
28804 tree exp
, rtx target
)
28807 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28808 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28809 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28810 tree arg3
= CALL_EXPR_ARG (exp
, 3);
28811 tree arg4
= CALL_EXPR_ARG (exp
, 4);
28812 rtx scratch0
, scratch1
;
28813 rtx op0
= expand_normal (arg0
);
28814 rtx op1
= expand_normal (arg1
);
28815 rtx op2
= expand_normal (arg2
);
28816 rtx op3
= expand_normal (arg3
);
28817 rtx op4
= expand_normal (arg4
);
28818 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
28820 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28821 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28822 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28823 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
28824 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
28825 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
28826 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
28828 if (VECTOR_MODE_P (modev2
))
28829 op0
= safe_vector_operand (op0
, modev2
);
28830 if (VECTOR_MODE_P (modev4
))
28831 op2
= safe_vector_operand (op2
, modev4
);
28833 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28834 op0
= copy_to_mode_reg (modev2
, op0
);
28835 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
28836 op1
= copy_to_mode_reg (modei3
, op1
);
28837 if ((optimize
&& !register_operand (op2
, modev4
))
28838 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
28839 op2
= copy_to_mode_reg (modev4
, op2
);
28840 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
28841 op3
= copy_to_mode_reg (modei5
, op3
);
28843 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
28845 error ("the fifth argument must be an 8-bit immediate");
28849 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
28851 if (optimize
|| !target
28852 || GET_MODE (target
) != tmode0
28853 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28854 target
= gen_reg_rtx (tmode0
);
28856 scratch1
= gen_reg_rtx (tmode1
);
28858 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28860 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28862 if (optimize
|| !target
28863 || GET_MODE (target
) != tmode1
28864 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28865 target
= gen_reg_rtx (tmode1
);
28867 scratch0
= gen_reg_rtx (tmode0
);
28869 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
28873 gcc_assert (d
->flag
);
28875 scratch0
= gen_reg_rtx (tmode0
);
28876 scratch1
= gen_reg_rtx (tmode1
);
28878 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28888 target
= gen_reg_rtx (SImode
);
28889 emit_move_insn (target
, const0_rtx
);
28890 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28893 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28894 gen_rtx_fmt_ee (EQ
, QImode
,
28895 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28898 return SUBREG_REG (target
);
28905 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28908 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
28909 tree exp
, rtx target
)
28912 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28913 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28914 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28915 rtx scratch0
, scratch1
;
28916 rtx op0
= expand_normal (arg0
);
28917 rtx op1
= expand_normal (arg1
);
28918 rtx op2
= expand_normal (arg2
);
28919 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
28921 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28922 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28923 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28924 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
28925 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
28927 if (VECTOR_MODE_P (modev2
))
28928 op0
= safe_vector_operand (op0
, modev2
);
28929 if (VECTOR_MODE_P (modev3
))
28930 op1
= safe_vector_operand (op1
, modev3
);
28932 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28933 op0
= copy_to_mode_reg (modev2
, op0
);
28934 if ((optimize
&& !register_operand (op1
, modev3
))
28935 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
28936 op1
= copy_to_mode_reg (modev3
, op1
);
28938 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
28940 error ("the third argument must be an 8-bit immediate");
28944 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
28946 if (optimize
|| !target
28947 || GET_MODE (target
) != tmode0
28948 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28949 target
= gen_reg_rtx (tmode0
);
28951 scratch1
= gen_reg_rtx (tmode1
);
28953 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
28955 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28957 if (optimize
|| !target
28958 || GET_MODE (target
) != tmode1
28959 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28960 target
= gen_reg_rtx (tmode1
);
28962 scratch0
= gen_reg_rtx (tmode0
);
28964 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
28968 gcc_assert (d
->flag
);
28970 scratch0
= gen_reg_rtx (tmode0
);
28971 scratch1
= gen_reg_rtx (tmode1
);
28973 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
28983 target
= gen_reg_rtx (SImode
);
28984 emit_move_insn (target
, const0_rtx
);
28985 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28988 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28989 gen_rtx_fmt_ee (EQ
, QImode
,
28990 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28993 return SUBREG_REG (target
);
28999 /* Subroutine of ix86_expand_builtin to take care of insns with
29000 variable number of operands. */
29003 ix86_expand_args_builtin (const struct builtin_description
*d
,
29004 tree exp
, rtx target
)
29006 rtx pat
, real_target
;
29007 unsigned int i
, nargs
;
29008 unsigned int nargs_constant
= 0;
29009 int num_memory
= 0;
29013 enum machine_mode mode
;
29015 bool last_arg_count
= false;
29016 enum insn_code icode
= d
->icode
;
29017 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29018 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29019 enum machine_mode rmode
= VOIDmode
;
29021 enum rtx_code comparison
= d
->comparison
;
29023 switch ((enum ix86_builtin_func_type
) d
->flag
)
29025 case V2DF_FTYPE_V2DF_ROUND
:
29026 case V4DF_FTYPE_V4DF_ROUND
:
29027 case V4SF_FTYPE_V4SF_ROUND
:
29028 case V8SF_FTYPE_V8SF_ROUND
:
29029 case V4SI_FTYPE_V4SF_ROUND
:
29030 case V8SI_FTYPE_V8SF_ROUND
:
29031 return ix86_expand_sse_round (d
, exp
, target
);
29032 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
29033 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
29034 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
29035 case INT_FTYPE_V8SF_V8SF_PTEST
:
29036 case INT_FTYPE_V4DI_V4DI_PTEST
:
29037 case INT_FTYPE_V4DF_V4DF_PTEST
:
29038 case INT_FTYPE_V4SF_V4SF_PTEST
:
29039 case INT_FTYPE_V2DI_V2DI_PTEST
:
29040 case INT_FTYPE_V2DF_V2DF_PTEST
:
29041 return ix86_expand_sse_ptest (d
, exp
, target
);
29042 case FLOAT128_FTYPE_FLOAT128
:
29043 case FLOAT_FTYPE_FLOAT
:
29044 case INT_FTYPE_INT
:
29045 case UINT64_FTYPE_INT
:
29046 case UINT16_FTYPE_UINT16
:
29047 case INT64_FTYPE_INT64
:
29048 case INT64_FTYPE_V4SF
:
29049 case INT64_FTYPE_V2DF
:
29050 case INT_FTYPE_V16QI
:
29051 case INT_FTYPE_V8QI
:
29052 case INT_FTYPE_V8SF
:
29053 case INT_FTYPE_V4DF
:
29054 case INT_FTYPE_V4SF
:
29055 case INT_FTYPE_V2DF
:
29056 case INT_FTYPE_V32QI
:
29057 case V16QI_FTYPE_V16QI
:
29058 case V8SI_FTYPE_V8SF
:
29059 case V8SI_FTYPE_V4SI
:
29060 case V8HI_FTYPE_V8HI
:
29061 case V8HI_FTYPE_V16QI
:
29062 case V8QI_FTYPE_V8QI
:
29063 case V8SF_FTYPE_V8SF
:
29064 case V8SF_FTYPE_V8SI
:
29065 case V8SF_FTYPE_V4SF
:
29066 case V8SF_FTYPE_V8HI
:
29067 case V4SI_FTYPE_V4SI
:
29068 case V4SI_FTYPE_V16QI
:
29069 case V4SI_FTYPE_V4SF
:
29070 case V4SI_FTYPE_V8SI
:
29071 case V4SI_FTYPE_V8HI
:
29072 case V4SI_FTYPE_V4DF
:
29073 case V4SI_FTYPE_V2DF
:
29074 case V4HI_FTYPE_V4HI
:
29075 case V4DF_FTYPE_V4DF
:
29076 case V4DF_FTYPE_V4SI
:
29077 case V4DF_FTYPE_V4SF
:
29078 case V4DF_FTYPE_V2DF
:
29079 case V4SF_FTYPE_V4SF
:
29080 case V4SF_FTYPE_V4SI
:
29081 case V4SF_FTYPE_V8SF
:
29082 case V4SF_FTYPE_V4DF
:
29083 case V4SF_FTYPE_V8HI
:
29084 case V4SF_FTYPE_V2DF
:
29085 case V2DI_FTYPE_V2DI
:
29086 case V2DI_FTYPE_V16QI
:
29087 case V2DI_FTYPE_V8HI
:
29088 case V2DI_FTYPE_V4SI
:
29089 case V2DF_FTYPE_V2DF
:
29090 case V2DF_FTYPE_V4SI
:
29091 case V2DF_FTYPE_V4DF
:
29092 case V2DF_FTYPE_V4SF
:
29093 case V2DF_FTYPE_V2SI
:
29094 case V2SI_FTYPE_V2SI
:
29095 case V2SI_FTYPE_V4SF
:
29096 case V2SI_FTYPE_V2SF
:
29097 case V2SI_FTYPE_V2DF
:
29098 case V2SF_FTYPE_V2SF
:
29099 case V2SF_FTYPE_V2SI
:
29100 case V32QI_FTYPE_V32QI
:
29101 case V32QI_FTYPE_V16QI
:
29102 case V16HI_FTYPE_V16HI
:
29103 case V16HI_FTYPE_V8HI
:
29104 case V8SI_FTYPE_V8SI
:
29105 case V16HI_FTYPE_V16QI
:
29106 case V8SI_FTYPE_V16QI
:
29107 case V4DI_FTYPE_V16QI
:
29108 case V8SI_FTYPE_V8HI
:
29109 case V4DI_FTYPE_V8HI
:
29110 case V4DI_FTYPE_V4SI
:
29111 case V4DI_FTYPE_V2DI
:
29114 case V4SF_FTYPE_V4SF_VEC_MERGE
:
29115 case V2DF_FTYPE_V2DF_VEC_MERGE
:
29116 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
29117 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
29118 case V16QI_FTYPE_V16QI_V16QI
:
29119 case V16QI_FTYPE_V8HI_V8HI
:
29120 case V8QI_FTYPE_V8QI_V8QI
:
29121 case V8QI_FTYPE_V4HI_V4HI
:
29122 case V8HI_FTYPE_V8HI_V8HI
:
29123 case V8HI_FTYPE_V16QI_V16QI
:
29124 case V8HI_FTYPE_V4SI_V4SI
:
29125 case V8SF_FTYPE_V8SF_V8SF
:
29126 case V8SF_FTYPE_V8SF_V8SI
:
29127 case V4SI_FTYPE_V4SI_V4SI
:
29128 case V4SI_FTYPE_V8HI_V8HI
:
29129 case V4SI_FTYPE_V4SF_V4SF
:
29130 case V4SI_FTYPE_V2DF_V2DF
:
29131 case V4HI_FTYPE_V4HI_V4HI
:
29132 case V4HI_FTYPE_V8QI_V8QI
:
29133 case V4HI_FTYPE_V2SI_V2SI
:
29134 case V4DF_FTYPE_V4DF_V4DF
:
29135 case V4DF_FTYPE_V4DF_V4DI
:
29136 case V4SF_FTYPE_V4SF_V4SF
:
29137 case V4SF_FTYPE_V4SF_V4SI
:
29138 case V4SF_FTYPE_V4SF_V2SI
:
29139 case V4SF_FTYPE_V4SF_V2DF
:
29140 case V4SF_FTYPE_V4SF_DI
:
29141 case V4SF_FTYPE_V4SF_SI
:
29142 case V2DI_FTYPE_V2DI_V2DI
:
29143 case V2DI_FTYPE_V16QI_V16QI
:
29144 case V2DI_FTYPE_V4SI_V4SI
:
29145 case V2DI_FTYPE_V2DI_V16QI
:
29146 case V2DI_FTYPE_V2DF_V2DF
:
29147 case V2SI_FTYPE_V2SI_V2SI
:
29148 case V2SI_FTYPE_V4HI_V4HI
:
29149 case V2SI_FTYPE_V2SF_V2SF
:
29150 case V2DF_FTYPE_V2DF_V2DF
:
29151 case V2DF_FTYPE_V2DF_V4SF
:
29152 case V2DF_FTYPE_V2DF_V2DI
:
29153 case V2DF_FTYPE_V2DF_DI
:
29154 case V2DF_FTYPE_V2DF_SI
:
29155 case V2SF_FTYPE_V2SF_V2SF
:
29156 case V1DI_FTYPE_V1DI_V1DI
:
29157 case V1DI_FTYPE_V8QI_V8QI
:
29158 case V1DI_FTYPE_V2SI_V2SI
:
29159 case V32QI_FTYPE_V16HI_V16HI
:
29160 case V16HI_FTYPE_V8SI_V8SI
:
29161 case V32QI_FTYPE_V32QI_V32QI
:
29162 case V16HI_FTYPE_V32QI_V32QI
:
29163 case V16HI_FTYPE_V16HI_V16HI
:
29164 case V8SI_FTYPE_V4DF_V4DF
:
29165 case V8SI_FTYPE_V8SI_V8SI
:
29166 case V8SI_FTYPE_V16HI_V16HI
:
29167 case V4DI_FTYPE_V4DI_V4DI
:
29168 case V4DI_FTYPE_V8SI_V8SI
:
29169 if (comparison
== UNKNOWN
)
29170 return ix86_expand_binop_builtin (icode
, exp
, target
);
29173 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
29174 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
29175 gcc_assert (comparison
!= UNKNOWN
);
29179 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
29180 case V16HI_FTYPE_V16HI_SI_COUNT
:
29181 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
29182 case V8SI_FTYPE_V8SI_SI_COUNT
:
29183 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
29184 case V4DI_FTYPE_V4DI_INT_COUNT
:
29185 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
29186 case V8HI_FTYPE_V8HI_SI_COUNT
:
29187 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
29188 case V4SI_FTYPE_V4SI_SI_COUNT
:
29189 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
29190 case V4HI_FTYPE_V4HI_SI_COUNT
:
29191 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
29192 case V2DI_FTYPE_V2DI_SI_COUNT
:
29193 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
29194 case V2SI_FTYPE_V2SI_SI_COUNT
:
29195 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
29196 case V1DI_FTYPE_V1DI_SI_COUNT
:
29198 last_arg_count
= true;
29200 case UINT64_FTYPE_UINT64_UINT64
:
29201 case UINT_FTYPE_UINT_UINT
:
29202 case UINT_FTYPE_UINT_USHORT
:
29203 case UINT_FTYPE_UINT_UCHAR
:
29204 case UINT16_FTYPE_UINT16_INT
:
29205 case UINT8_FTYPE_UINT8_INT
:
29208 case V2DI_FTYPE_V2DI_INT_CONVERT
:
29211 nargs_constant
= 1;
29213 case V4DI_FTYPE_V4DI_INT_CONVERT
:
29216 nargs_constant
= 1;
29218 case V8HI_FTYPE_V8HI_INT
:
29219 case V8HI_FTYPE_V8SF_INT
:
29220 case V8HI_FTYPE_V4SF_INT
:
29221 case V8SF_FTYPE_V8SF_INT
:
29222 case V4SI_FTYPE_V4SI_INT
:
29223 case V4SI_FTYPE_V8SI_INT
:
29224 case V4HI_FTYPE_V4HI_INT
:
29225 case V4DF_FTYPE_V4DF_INT
:
29226 case V4SF_FTYPE_V4SF_INT
:
29227 case V4SF_FTYPE_V8SF_INT
:
29228 case V2DI_FTYPE_V2DI_INT
:
29229 case V2DF_FTYPE_V2DF_INT
:
29230 case V2DF_FTYPE_V4DF_INT
:
29231 case V16HI_FTYPE_V16HI_INT
:
29232 case V8SI_FTYPE_V8SI_INT
:
29233 case V4DI_FTYPE_V4DI_INT
:
29234 case V2DI_FTYPE_V4DI_INT
:
29236 nargs_constant
= 1;
29238 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
29239 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
29240 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
29241 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
29242 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
29243 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
29246 case V32QI_FTYPE_V32QI_V32QI_INT
:
29247 case V16HI_FTYPE_V16HI_V16HI_INT
:
29248 case V16QI_FTYPE_V16QI_V16QI_INT
:
29249 case V4DI_FTYPE_V4DI_V4DI_INT
:
29250 case V8HI_FTYPE_V8HI_V8HI_INT
:
29251 case V8SI_FTYPE_V8SI_V8SI_INT
:
29252 case V8SI_FTYPE_V8SI_V4SI_INT
:
29253 case V8SF_FTYPE_V8SF_V8SF_INT
:
29254 case V8SF_FTYPE_V8SF_V4SF_INT
:
29255 case V4SI_FTYPE_V4SI_V4SI_INT
:
29256 case V4DF_FTYPE_V4DF_V4DF_INT
:
29257 case V4DF_FTYPE_V4DF_V2DF_INT
:
29258 case V4SF_FTYPE_V4SF_V4SF_INT
:
29259 case V2DI_FTYPE_V2DI_V2DI_INT
:
29260 case V4DI_FTYPE_V4DI_V2DI_INT
:
29261 case V2DF_FTYPE_V2DF_V2DF_INT
:
29263 nargs_constant
= 1;
29265 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
29268 nargs_constant
= 1;
29270 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
29273 nargs_constant
= 1;
29275 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
29278 nargs_constant
= 1;
29280 case V2DI_FTYPE_V2DI_UINT_UINT
:
29282 nargs_constant
= 2;
29284 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
29285 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
29286 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
29287 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
29289 nargs_constant
= 1;
29291 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
29293 nargs_constant
= 2;
29296 gcc_unreachable ();
29299 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29301 if (comparison
!= UNKNOWN
)
29303 gcc_assert (nargs
== 2);
29304 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
29307 if (rmode
== VOIDmode
|| rmode
== tmode
)
29311 || GET_MODE (target
) != tmode
29312 || !insn_p
->operand
[0].predicate (target
, tmode
))
29313 target
= gen_reg_rtx (tmode
);
29314 real_target
= target
;
29318 target
= gen_reg_rtx (rmode
);
29319 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
29322 for (i
= 0; i
< nargs
; i
++)
29324 tree arg
= CALL_EXPR_ARG (exp
, i
);
29325 rtx op
= expand_normal (arg
);
29326 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29327 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29329 if (last_arg_count
&& (i
+ 1) == nargs
)
29331 /* SIMD shift insns take either an 8-bit immediate or
29332 register as count. But builtin functions take int as
29333 count. If count doesn't match, we put it in register. */
29336 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
29337 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
29338 op
= copy_to_reg (op
);
29341 else if ((nargs
- i
) <= nargs_constant
)
29346 case CODE_FOR_avx2_inserti128
:
29347 case CODE_FOR_avx2_extracti128
:
29348 error ("the last argument must be an 1-bit immediate");
29351 case CODE_FOR_sse4_1_roundsd
:
29352 case CODE_FOR_sse4_1_roundss
:
29354 case CODE_FOR_sse4_1_roundpd
:
29355 case CODE_FOR_sse4_1_roundps
:
29356 case CODE_FOR_avx_roundpd256
:
29357 case CODE_FOR_avx_roundps256
:
29359 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
29360 case CODE_FOR_sse4_1_roundps_sfix
:
29361 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
29362 case CODE_FOR_avx_roundps_sfix256
:
29364 case CODE_FOR_sse4_1_blendps
:
29365 case CODE_FOR_avx_blendpd256
:
29366 case CODE_FOR_avx_vpermilv4df
:
29367 error ("the last argument must be a 4-bit immediate");
29370 case CODE_FOR_sse4_1_blendpd
:
29371 case CODE_FOR_avx_vpermilv2df
:
29372 case CODE_FOR_xop_vpermil2v2df3
:
29373 case CODE_FOR_xop_vpermil2v4sf3
:
29374 case CODE_FOR_xop_vpermil2v4df3
:
29375 case CODE_FOR_xop_vpermil2v8sf3
:
29376 error ("the last argument must be a 2-bit immediate");
29379 case CODE_FOR_avx_vextractf128v4df
:
29380 case CODE_FOR_avx_vextractf128v8sf
:
29381 case CODE_FOR_avx_vextractf128v8si
:
29382 case CODE_FOR_avx_vinsertf128v4df
:
29383 case CODE_FOR_avx_vinsertf128v8sf
:
29384 case CODE_FOR_avx_vinsertf128v8si
:
29385 error ("the last argument must be a 1-bit immediate");
29388 case CODE_FOR_avx_vmcmpv2df3
:
29389 case CODE_FOR_avx_vmcmpv4sf3
:
29390 case CODE_FOR_avx_cmpv2df3
:
29391 case CODE_FOR_avx_cmpv4sf3
:
29392 case CODE_FOR_avx_cmpv4df3
:
29393 case CODE_FOR_avx_cmpv8sf3
:
29394 error ("the last argument must be a 5-bit immediate");
29398 switch (nargs_constant
)
29401 if ((nargs
- i
) == nargs_constant
)
29403 error ("the next to last argument must be an 8-bit immediate");
29407 error ("the last argument must be an 8-bit immediate");
29410 gcc_unreachable ();
29417 if (VECTOR_MODE_P (mode
))
29418 op
= safe_vector_operand (op
, mode
);
29420 /* If we aren't optimizing, only allow one memory operand to
29422 if (memory_operand (op
, mode
))
29425 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
29427 if (optimize
|| !match
|| num_memory
> 1)
29428 op
= copy_to_mode_reg (mode
, op
);
29432 op
= copy_to_reg (op
);
29433 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
29438 args
[i
].mode
= mode
;
29444 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
29447 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
29450 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29454 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29455 args
[2].op
, args
[3].op
);
29458 gcc_unreachable ();
29468 /* Subroutine of ix86_expand_builtin to take care of special insns
29469 with variable number of operands. */
29472 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
29473 tree exp
, rtx target
)
29477 unsigned int i
, nargs
, arg_adjust
, memory
;
29481 enum machine_mode mode
;
29483 enum insn_code icode
= d
->icode
;
29484 bool last_arg_constant
= false;
29485 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29486 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29487 enum { load
, store
} klass
;
29489 switch ((enum ix86_builtin_func_type
) d
->flag
)
29491 case VOID_FTYPE_VOID
:
29492 if (icode
== CODE_FOR_avx_vzeroupper
)
29493 target
= GEN_INT (vzeroupper_intrinsic
);
29494 emit_insn (GEN_FCN (icode
) (target
));
29496 case VOID_FTYPE_UINT64
:
29497 case VOID_FTYPE_UNSIGNED
:
29503 case INT_FTYPE_VOID
:
29504 case UINT64_FTYPE_VOID
:
29505 case UNSIGNED_FTYPE_VOID
:
29510 case UINT64_FTYPE_PUNSIGNED
:
29511 case V2DI_FTYPE_PV2DI
:
29512 case V4DI_FTYPE_PV4DI
:
29513 case V32QI_FTYPE_PCCHAR
:
29514 case V16QI_FTYPE_PCCHAR
:
29515 case V8SF_FTYPE_PCV4SF
:
29516 case V8SF_FTYPE_PCFLOAT
:
29517 case V4SF_FTYPE_PCFLOAT
:
29518 case V4DF_FTYPE_PCV2DF
:
29519 case V4DF_FTYPE_PCDOUBLE
:
29520 case V2DF_FTYPE_PCDOUBLE
:
29521 case VOID_FTYPE_PVOID
:
29526 case VOID_FTYPE_PV2SF_V4SF
:
29527 case VOID_FTYPE_PV4DI_V4DI
:
29528 case VOID_FTYPE_PV2DI_V2DI
:
29529 case VOID_FTYPE_PCHAR_V32QI
:
29530 case VOID_FTYPE_PCHAR_V16QI
:
29531 case VOID_FTYPE_PFLOAT_V8SF
:
29532 case VOID_FTYPE_PFLOAT_V4SF
:
29533 case VOID_FTYPE_PDOUBLE_V4DF
:
29534 case VOID_FTYPE_PDOUBLE_V2DF
:
29535 case VOID_FTYPE_PLONGLONG_LONGLONG
:
29536 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
29537 case VOID_FTYPE_PINT_INT
:
29540 /* Reserve memory operand for target. */
29541 memory
= ARRAY_SIZE (args
);
29543 case V4SF_FTYPE_V4SF_PCV2SF
:
29544 case V2DF_FTYPE_V2DF_PCDOUBLE
:
29549 case V8SF_FTYPE_PCV8SF_V8SI
:
29550 case V4DF_FTYPE_PCV4DF_V4DI
:
29551 case V4SF_FTYPE_PCV4SF_V4SI
:
29552 case V2DF_FTYPE_PCV2DF_V2DI
:
29553 case V8SI_FTYPE_PCV8SI_V8SI
:
29554 case V4DI_FTYPE_PCV4DI_V4DI
:
29555 case V4SI_FTYPE_PCV4SI_V4SI
:
29556 case V2DI_FTYPE_PCV2DI_V2DI
:
29561 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
29562 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
29563 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
29564 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
29565 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
29566 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
29567 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
29568 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
29571 /* Reserve memory operand for target. */
29572 memory
= ARRAY_SIZE (args
);
29574 case VOID_FTYPE_UINT_UINT_UINT
:
29575 case VOID_FTYPE_UINT64_UINT_UINT
:
29576 case UCHAR_FTYPE_UINT_UINT_UINT
:
29577 case UCHAR_FTYPE_UINT64_UINT_UINT
:
29580 memory
= ARRAY_SIZE (args
);
29581 last_arg_constant
= true;
29584 gcc_unreachable ();
29587 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29589 if (klass
== store
)
29591 arg
= CALL_EXPR_ARG (exp
, 0);
29592 op
= expand_normal (arg
);
29593 gcc_assert (target
== 0);
29596 if (GET_MODE (op
) != Pmode
)
29597 op
= convert_to_mode (Pmode
, op
, 1);
29598 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
29601 target
= force_reg (tmode
, op
);
29609 || !register_operand (target
, tmode
)
29610 || GET_MODE (target
) != tmode
)
29611 target
= gen_reg_rtx (tmode
);
29614 for (i
= 0; i
< nargs
; i
++)
29616 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29619 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
29620 op
= expand_normal (arg
);
29621 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29623 if (last_arg_constant
&& (i
+ 1) == nargs
)
29627 if (icode
== CODE_FOR_lwp_lwpvalsi3
29628 || icode
== CODE_FOR_lwp_lwpinssi3
29629 || icode
== CODE_FOR_lwp_lwpvaldi3
29630 || icode
== CODE_FOR_lwp_lwpinsdi3
)
29631 error ("the last argument must be a 32-bit immediate");
29633 error ("the last argument must be an 8-bit immediate");
29641 /* This must be the memory operand. */
29642 if (GET_MODE (op
) != Pmode
)
29643 op
= convert_to_mode (Pmode
, op
, 1);
29644 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29645 gcc_assert (GET_MODE (op
) == mode
29646 || GET_MODE (op
) == VOIDmode
);
29650 /* This must be register. */
29651 if (VECTOR_MODE_P (mode
))
29652 op
= safe_vector_operand (op
, mode
);
29654 gcc_assert (GET_MODE (op
) == mode
29655 || GET_MODE (op
) == VOIDmode
);
29656 op
= copy_to_mode_reg (mode
, op
);
29661 args
[i
].mode
= mode
;
29667 pat
= GEN_FCN (icode
) (target
);
29670 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29673 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29676 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29679 gcc_unreachable ();
29685 return klass
== store
? 0 : target
;
29688 /* Return the integer constant in ARG. Constrain it to be in the range
29689 of the subparts of VEC_TYPE; issue an error if not. */
29692 get_element_number (tree vec_type
, tree arg
)
29694 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29696 if (!host_integerp (arg
, 1)
29697 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29699 error ("selector must be an integer constant in the range 0..%wi", max
);
29706 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29707 ix86_expand_vector_init. We DO have language-level syntax for this, in
29708 the form of (type){ init-list }. Except that since we can't place emms
29709 instructions from inside the compiler, we can't allow the use of MMX
29710 registers unless the user explicitly asks for it. So we do *not* define
29711 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29712 we have builtins invoked by mmintrin.h that gives us license to emit
29713 these sorts of instructions. */
29716 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29718 enum machine_mode tmode
= TYPE_MODE (type
);
29719 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29720 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29721 rtvec v
= rtvec_alloc (n_elt
);
29723 gcc_assert (VECTOR_MODE_P (tmode
));
29724 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29726 for (i
= 0; i
< n_elt
; ++i
)
29728 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29729 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29732 if (!target
|| !register_operand (target
, tmode
))
29733 target
= gen_reg_rtx (tmode
);
29735 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29739 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29740 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29741 had a language-level syntax for referencing vector elements. */
29744 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29746 enum machine_mode tmode
, mode0
;
29751 arg0
= CALL_EXPR_ARG (exp
, 0);
29752 arg1
= CALL_EXPR_ARG (exp
, 1);
29754 op0
= expand_normal (arg0
);
29755 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
29757 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29758 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
29759 gcc_assert (VECTOR_MODE_P (mode0
));
29761 op0
= force_reg (mode0
, op0
);
29763 if (optimize
|| !target
|| !register_operand (target
, tmode
))
29764 target
= gen_reg_rtx (tmode
);
29766 ix86_expand_vector_extract (true, target
, op0
, elt
);
29771 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29772 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29773 a language-level syntax for referencing vector elements. */
29776 ix86_expand_vec_set_builtin (tree exp
)
29778 enum machine_mode tmode
, mode1
;
29779 tree arg0
, arg1
, arg2
;
29781 rtx op0
, op1
, target
;
29783 arg0
= CALL_EXPR_ARG (exp
, 0);
29784 arg1
= CALL_EXPR_ARG (exp
, 1);
29785 arg2
= CALL_EXPR_ARG (exp
, 2);
29787 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
29788 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29789 gcc_assert (VECTOR_MODE_P (tmode
));
29791 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
29792 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
29793 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
29795 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
29796 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
29798 op0
= force_reg (tmode
, op0
);
29799 op1
= force_reg (mode1
, op1
);
29801 /* OP0 is the source of these builtin functions and shouldn't be
29802 modified. Create a copy, use it and return it as target. */
29803 target
= gen_reg_rtx (tmode
);
29804 emit_move_insn (target
, op0
);
29805 ix86_expand_vector_set (true, target
, op1
, elt
);
29810 /* Expand an expression EXP that calls a built-in function,
29811 with result going to TARGET if that's convenient
29812 (and in mode MODE if that's convenient).
29813 SUBTARGET may be used as the target for computing one of EXP's operands.
29814 IGNORE is nonzero if the value is to be ignored. */
29817 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
29818 enum machine_mode mode ATTRIBUTE_UNUSED
,
29819 int ignore ATTRIBUTE_UNUSED
)
29821 const struct builtin_description
*d
;
29823 enum insn_code icode
;
29824 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
29825 tree arg0
, arg1
, arg2
, arg3
, arg4
;
29826 rtx op0
, op1
, op2
, op3
, op4
, pat
;
29827 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
29828 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
29830 /* For CPU builtins that can be folded, fold first and expand the fold. */
29833 case IX86_BUILTIN_CPU_INIT
:
29835 /* Make it call __cpu_indicator_init in libgcc. */
29836 tree call_expr
, fndecl
, type
;
29837 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
29838 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
29839 call_expr
= build_call_expr (fndecl
, 0);
29840 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
29842 case IX86_BUILTIN_CPU_IS
:
29843 case IX86_BUILTIN_CPU_SUPPORTS
:
29845 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29846 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
29847 gcc_assert (fold_expr
!= NULL_TREE
);
29848 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
29852 /* Determine whether the builtin function is available under the current ISA.
29853 Originally the builtin was not created if it wasn't applicable to the
29854 current ISA based on the command line switches. With function specific
29855 options, we need to check in the context of the function making the call
29856 whether it is supported. */
29857 if (ix86_builtins_isa
[fcode
].isa
29858 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
29860 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
29861 NULL
, (enum fpmath_unit
) 0, false);
29864 error ("%qE needs unknown isa option", fndecl
);
29867 gcc_assert (opts
!= NULL
);
29868 error ("%qE needs isa option %s", fndecl
, opts
);
29876 case IX86_BUILTIN_MASKMOVQ
:
29877 case IX86_BUILTIN_MASKMOVDQU
:
29878 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
29879 ? CODE_FOR_mmx_maskmovq
29880 : CODE_FOR_sse2_maskmovdqu
);
29881 /* Note the arg order is different from the operand order. */
29882 arg1
= CALL_EXPR_ARG (exp
, 0);
29883 arg2
= CALL_EXPR_ARG (exp
, 1);
29884 arg0
= CALL_EXPR_ARG (exp
, 2);
29885 op0
= expand_normal (arg0
);
29886 op1
= expand_normal (arg1
);
29887 op2
= expand_normal (arg2
);
29888 mode0
= insn_data
[icode
].operand
[0].mode
;
29889 mode1
= insn_data
[icode
].operand
[1].mode
;
29890 mode2
= insn_data
[icode
].operand
[2].mode
;
29892 if (GET_MODE (op0
) != Pmode
)
29893 op0
= convert_to_mode (Pmode
, op0
, 1);
29894 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
29896 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29897 op0
= copy_to_mode_reg (mode0
, op0
);
29898 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
29899 op1
= copy_to_mode_reg (mode1
, op1
);
29900 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
29901 op2
= copy_to_mode_reg (mode2
, op2
);
29902 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
29908 case IX86_BUILTIN_LDMXCSR
:
29909 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
29910 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29911 emit_move_insn (target
, op0
);
29912 emit_insn (gen_sse_ldmxcsr (target
));
29915 case IX86_BUILTIN_STMXCSR
:
29916 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29917 emit_insn (gen_sse_stmxcsr (target
));
29918 return copy_to_mode_reg (SImode
, target
);
29920 case IX86_BUILTIN_CLFLUSH
:
29921 arg0
= CALL_EXPR_ARG (exp
, 0);
29922 op0
= expand_normal (arg0
);
29923 icode
= CODE_FOR_sse2_clflush
;
29924 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29926 if (GET_MODE (op0
) != Pmode
)
29927 op0
= convert_to_mode (Pmode
, op0
, 1);
29928 op0
= force_reg (Pmode
, op0
);
29931 emit_insn (gen_sse2_clflush (op0
));
29934 case IX86_BUILTIN_MONITOR
:
29935 arg0
= CALL_EXPR_ARG (exp
, 0);
29936 arg1
= CALL_EXPR_ARG (exp
, 1);
29937 arg2
= CALL_EXPR_ARG (exp
, 2);
29938 op0
= expand_normal (arg0
);
29939 op1
= expand_normal (arg1
);
29940 op2
= expand_normal (arg2
);
29943 if (GET_MODE (op0
) != Pmode
)
29944 op0
= convert_to_mode (Pmode
, op0
, 1);
29945 op0
= force_reg (Pmode
, op0
);
29948 op1
= copy_to_mode_reg (SImode
, op1
);
29950 op2
= copy_to_mode_reg (SImode
, op2
);
29951 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
29954 case IX86_BUILTIN_MWAIT
:
29955 arg0
= CALL_EXPR_ARG (exp
, 0);
29956 arg1
= CALL_EXPR_ARG (exp
, 1);
29957 op0
= expand_normal (arg0
);
29958 op1
= expand_normal (arg1
);
29960 op0
= copy_to_mode_reg (SImode
, op0
);
29962 op1
= copy_to_mode_reg (SImode
, op1
);
29963 emit_insn (gen_sse3_mwait (op0
, op1
));
29966 case IX86_BUILTIN_VEC_INIT_V2SI
:
29967 case IX86_BUILTIN_VEC_INIT_V4HI
:
29968 case IX86_BUILTIN_VEC_INIT_V8QI
:
29969 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
29971 case IX86_BUILTIN_VEC_EXT_V2DF
:
29972 case IX86_BUILTIN_VEC_EXT_V2DI
:
29973 case IX86_BUILTIN_VEC_EXT_V4SF
:
29974 case IX86_BUILTIN_VEC_EXT_V4SI
:
29975 case IX86_BUILTIN_VEC_EXT_V8HI
:
29976 case IX86_BUILTIN_VEC_EXT_V2SI
:
29977 case IX86_BUILTIN_VEC_EXT_V4HI
:
29978 case IX86_BUILTIN_VEC_EXT_V16QI
:
29979 return ix86_expand_vec_ext_builtin (exp
, target
);
29981 case IX86_BUILTIN_VEC_SET_V2DI
:
29982 case IX86_BUILTIN_VEC_SET_V4SF
:
29983 case IX86_BUILTIN_VEC_SET_V4SI
:
29984 case IX86_BUILTIN_VEC_SET_V8HI
:
29985 case IX86_BUILTIN_VEC_SET_V4HI
:
29986 case IX86_BUILTIN_VEC_SET_V16QI
:
29987 return ix86_expand_vec_set_builtin (exp
);
29989 case IX86_BUILTIN_INFQ
:
29990 case IX86_BUILTIN_HUGE_VALQ
:
29992 REAL_VALUE_TYPE inf
;
29996 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
29998 tmp
= validize_mem (force_const_mem (mode
, tmp
));
30001 target
= gen_reg_rtx (mode
);
30003 emit_move_insn (target
, tmp
);
30007 case IX86_BUILTIN_LLWPCB
:
30008 arg0
= CALL_EXPR_ARG (exp
, 0);
30009 op0
= expand_normal (arg0
);
30010 icode
= CODE_FOR_lwp_llwpcb
;
30011 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
30013 if (GET_MODE (op0
) != Pmode
)
30014 op0
= convert_to_mode (Pmode
, op0
, 1);
30015 op0
= force_reg (Pmode
, op0
);
30017 emit_insn (gen_lwp_llwpcb (op0
));
30020 case IX86_BUILTIN_SLWPCB
:
30021 icode
= CODE_FOR_lwp_slwpcb
;
30023 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
30024 target
= gen_reg_rtx (Pmode
);
30025 emit_insn (gen_lwp_slwpcb (target
));
30028 case IX86_BUILTIN_BEXTRI32
:
30029 case IX86_BUILTIN_BEXTRI64
:
30030 arg0
= CALL_EXPR_ARG (exp
, 0);
30031 arg1
= CALL_EXPR_ARG (exp
, 1);
30032 op0
= expand_normal (arg0
);
30033 op1
= expand_normal (arg1
);
30034 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
30035 ? CODE_FOR_tbm_bextri_si
30036 : CODE_FOR_tbm_bextri_di
);
30037 if (!CONST_INT_P (op1
))
30039 error ("last argument must be an immediate");
30044 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
30045 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
30046 op1
= GEN_INT (length
);
30047 op2
= GEN_INT (lsb_index
);
30048 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
30054 case IX86_BUILTIN_RDRAND16_STEP
:
30055 icode
= CODE_FOR_rdrandhi_1
;
30059 case IX86_BUILTIN_RDRAND32_STEP
:
30060 icode
= CODE_FOR_rdrandsi_1
;
30064 case IX86_BUILTIN_RDRAND64_STEP
:
30065 icode
= CODE_FOR_rdranddi_1
;
30069 op0
= gen_reg_rtx (mode0
);
30070 emit_insn (GEN_FCN (icode
) (op0
));
30072 arg0
= CALL_EXPR_ARG (exp
, 0);
30073 op1
= expand_normal (arg0
);
30074 if (!address_operand (op1
, VOIDmode
))
30076 op1
= convert_memory_address (Pmode
, op1
);
30077 op1
= copy_addr_to_reg (op1
);
30079 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
30081 op1
= gen_reg_rtx (SImode
);
30082 emit_move_insn (op1
, CONST1_RTX (SImode
));
30084 /* Emit SImode conditional move. */
30085 if (mode0
== HImode
)
30087 op2
= gen_reg_rtx (SImode
);
30088 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
30090 else if (mode0
== SImode
)
30093 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
30096 target
= gen_reg_rtx (SImode
);
30098 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
30100 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30101 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
30104 case IX86_BUILTIN_GATHERSIV2DF
:
30105 icode
= CODE_FOR_avx2_gathersiv2df
;
30107 case IX86_BUILTIN_GATHERSIV4DF
:
30108 icode
= CODE_FOR_avx2_gathersiv4df
;
30110 case IX86_BUILTIN_GATHERDIV2DF
:
30111 icode
= CODE_FOR_avx2_gatherdiv2df
;
30113 case IX86_BUILTIN_GATHERDIV4DF
:
30114 icode
= CODE_FOR_avx2_gatherdiv4df
;
30116 case IX86_BUILTIN_GATHERSIV4SF
:
30117 icode
= CODE_FOR_avx2_gathersiv4sf
;
30119 case IX86_BUILTIN_GATHERSIV8SF
:
30120 icode
= CODE_FOR_avx2_gathersiv8sf
;
30122 case IX86_BUILTIN_GATHERDIV4SF
:
30123 icode
= CODE_FOR_avx2_gatherdiv4sf
;
30125 case IX86_BUILTIN_GATHERDIV8SF
:
30126 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30128 case IX86_BUILTIN_GATHERSIV2DI
:
30129 icode
= CODE_FOR_avx2_gathersiv2di
;
30131 case IX86_BUILTIN_GATHERSIV4DI
:
30132 icode
= CODE_FOR_avx2_gathersiv4di
;
30134 case IX86_BUILTIN_GATHERDIV2DI
:
30135 icode
= CODE_FOR_avx2_gatherdiv2di
;
30137 case IX86_BUILTIN_GATHERDIV4DI
:
30138 icode
= CODE_FOR_avx2_gatherdiv4di
;
30140 case IX86_BUILTIN_GATHERSIV4SI
:
30141 icode
= CODE_FOR_avx2_gathersiv4si
;
30143 case IX86_BUILTIN_GATHERSIV8SI
:
30144 icode
= CODE_FOR_avx2_gathersiv8si
;
30146 case IX86_BUILTIN_GATHERDIV4SI
:
30147 icode
= CODE_FOR_avx2_gatherdiv4si
;
30149 case IX86_BUILTIN_GATHERDIV8SI
:
30150 icode
= CODE_FOR_avx2_gatherdiv8si
;
30152 case IX86_BUILTIN_GATHERALTSIV4DF
:
30153 icode
= CODE_FOR_avx2_gathersiv4df
;
30155 case IX86_BUILTIN_GATHERALTDIV8SF
:
30156 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30158 case IX86_BUILTIN_GATHERALTSIV4DI
:
30159 icode
= CODE_FOR_avx2_gathersiv4di
;
30161 case IX86_BUILTIN_GATHERALTDIV8SI
:
30162 icode
= CODE_FOR_avx2_gatherdiv8si
;
30166 arg0
= CALL_EXPR_ARG (exp
, 0);
30167 arg1
= CALL_EXPR_ARG (exp
, 1);
30168 arg2
= CALL_EXPR_ARG (exp
, 2);
30169 arg3
= CALL_EXPR_ARG (exp
, 3);
30170 arg4
= CALL_EXPR_ARG (exp
, 4);
30171 op0
= expand_normal (arg0
);
30172 op1
= expand_normal (arg1
);
30173 op2
= expand_normal (arg2
);
30174 op3
= expand_normal (arg3
);
30175 op4
= expand_normal (arg4
);
30176 /* Note the arg order is different from the operand order. */
30177 mode0
= insn_data
[icode
].operand
[1].mode
;
30178 mode2
= insn_data
[icode
].operand
[3].mode
;
30179 mode3
= insn_data
[icode
].operand
[4].mode
;
30180 mode4
= insn_data
[icode
].operand
[5].mode
;
30182 if (target
== NULL_RTX
30183 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
30184 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
30186 subtarget
= target
;
30188 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
30189 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
30191 rtx half
= gen_reg_rtx (V4SImode
);
30192 if (!nonimmediate_operand (op2
, V8SImode
))
30193 op2
= copy_to_mode_reg (V8SImode
, op2
);
30194 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
30197 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
30198 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
30200 rtx (*gen
) (rtx
, rtx
);
30201 rtx half
= gen_reg_rtx (mode0
);
30202 if (mode0
== V4SFmode
)
30203 gen
= gen_vec_extract_lo_v8sf
;
30205 gen
= gen_vec_extract_lo_v8si
;
30206 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
30207 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
30208 emit_insn (gen (half
, op0
));
30210 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
30211 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
30212 emit_insn (gen (half
, op3
));
30216 /* Force memory operand only with base register here. But we
30217 don't want to do it on memory operand for other builtin
30219 if (GET_MODE (op1
) != Pmode
)
30220 op1
= convert_to_mode (Pmode
, op1
, 1);
30221 op1
= force_reg (Pmode
, op1
);
30223 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30224 op0
= copy_to_mode_reg (mode0
, op0
);
30225 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
30226 op1
= copy_to_mode_reg (Pmode
, op1
);
30227 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
30228 op2
= copy_to_mode_reg (mode2
, op2
);
30229 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
30230 op3
= copy_to_mode_reg (mode3
, op3
);
30231 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
30233 error ("last argument must be scale 1, 2, 4, 8");
30237 /* Optimize. If mask is known to have all high bits set,
30238 replace op0 with pc_rtx to signal that the instruction
30239 overwrites the whole destination and doesn't use its
30240 previous contents. */
30243 if (TREE_CODE (arg3
) == VECTOR_CST
)
30245 unsigned int negative
= 0;
30246 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
30248 tree cst
= VECTOR_CST_ELT (arg3
, i
);
30249 if (TREE_CODE (cst
) == INTEGER_CST
30250 && tree_int_cst_sign_bit (cst
))
30252 else if (TREE_CODE (cst
) == REAL_CST
30253 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
30256 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
30259 else if (TREE_CODE (arg3
) == SSA_NAME
)
30261 /* Recognize also when mask is like:
30262 __v2df src = _mm_setzero_pd ();
30263 __v2df mask = _mm_cmpeq_pd (src, src);
30265 __v8sf src = _mm256_setzero_ps ();
30266 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
30267 as that is a cheaper way to load all ones into
30268 a register than having to load a constant from
30270 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
30271 if (is_gimple_call (def_stmt
))
30273 tree fndecl
= gimple_call_fndecl (def_stmt
);
30275 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30276 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
30278 case IX86_BUILTIN_CMPPD
:
30279 case IX86_BUILTIN_CMPPS
:
30280 case IX86_BUILTIN_CMPPD256
:
30281 case IX86_BUILTIN_CMPPS256
:
30282 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
30285 case IX86_BUILTIN_CMPEQPD
:
30286 case IX86_BUILTIN_CMPEQPS
:
30287 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
30288 && initializer_zerop (gimple_call_arg (def_stmt
,
30299 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
30304 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
30305 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
30307 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
30308 ? V4SFmode
: V4SImode
;
30309 if (target
== NULL_RTX
)
30310 target
= gen_reg_rtx (tmode
);
30311 if (tmode
== V4SFmode
)
30312 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
30314 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
30317 target
= subtarget
;
30321 case IX86_BUILTIN_XABORT
:
30322 icode
= CODE_FOR_xabort
;
30323 arg0
= CALL_EXPR_ARG (exp
, 0);
30324 op0
= expand_normal (arg0
);
30325 mode0
= insn_data
[icode
].operand
[0].mode
;
30326 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
30328 error ("the xabort's argument must be an 8-bit immediate");
30331 emit_insn (gen_xabort (op0
));
30338 for (i
= 0, d
= bdesc_special_args
;
30339 i
< ARRAY_SIZE (bdesc_special_args
);
30341 if (d
->code
== fcode
)
30342 return ix86_expand_special_args_builtin (d
, exp
, target
);
30344 for (i
= 0, d
= bdesc_args
;
30345 i
< ARRAY_SIZE (bdesc_args
);
30347 if (d
->code
== fcode
)
30350 case IX86_BUILTIN_FABSQ
:
30351 case IX86_BUILTIN_COPYSIGNQ
:
30353 /* Emit a normal call if SSE isn't available. */
30354 return expand_call (exp
, target
, ignore
);
30356 return ix86_expand_args_builtin (d
, exp
, target
);
30359 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
30360 if (d
->code
== fcode
)
30361 return ix86_expand_sse_comi (d
, exp
, target
);
30363 for (i
= 0, d
= bdesc_pcmpestr
;
30364 i
< ARRAY_SIZE (bdesc_pcmpestr
);
30366 if (d
->code
== fcode
)
30367 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
30369 for (i
= 0, d
= bdesc_pcmpistr
;
30370 i
< ARRAY_SIZE (bdesc_pcmpistr
);
30372 if (d
->code
== fcode
)
30373 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
30375 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
30376 if (d
->code
== fcode
)
30377 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
30378 (enum ix86_builtin_func_type
)
30379 d
->flag
, d
->comparison
);
30381 gcc_unreachable ();
30384 /* Returns a function decl for a vectorized version of the builtin function
30385 with builtin function code FN and the result vector type TYPE, or NULL_TREE
30386 if it is not available. */
30389 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
30392 enum machine_mode in_mode
, out_mode
;
30394 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
30396 if (TREE_CODE (type_out
) != VECTOR_TYPE
30397 || TREE_CODE (type_in
) != VECTOR_TYPE
30398 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
30401 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30402 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
30403 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30404 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30408 case BUILT_IN_SQRT
:
30409 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30411 if (out_n
== 2 && in_n
== 2)
30412 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
30413 else if (out_n
== 4 && in_n
== 4)
30414 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
30418 case BUILT_IN_SQRTF
:
30419 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30421 if (out_n
== 4 && in_n
== 4)
30422 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
30423 else if (out_n
== 8 && in_n
== 8)
30424 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
30428 case BUILT_IN_IFLOOR
:
30429 case BUILT_IN_LFLOOR
:
30430 case BUILT_IN_LLFLOOR
:
30431 /* The round insn does not trap on denormals. */
30432 if (flag_trapping_math
|| !TARGET_ROUND
)
30435 if (out_mode
== SImode
&& in_mode
== DFmode
)
30437 if (out_n
== 4 && in_n
== 2)
30438 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
30439 else if (out_n
== 8 && in_n
== 4)
30440 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
30444 case BUILT_IN_IFLOORF
:
30445 case BUILT_IN_LFLOORF
:
30446 case BUILT_IN_LLFLOORF
:
30447 /* The round insn does not trap on denormals. */
30448 if (flag_trapping_math
|| !TARGET_ROUND
)
30451 if (out_mode
== SImode
&& in_mode
== SFmode
)
30453 if (out_n
== 4 && in_n
== 4)
30454 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
30455 else if (out_n
== 8 && in_n
== 8)
30456 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
30460 case BUILT_IN_ICEIL
:
30461 case BUILT_IN_LCEIL
:
30462 case BUILT_IN_LLCEIL
:
30463 /* The round insn does not trap on denormals. */
30464 if (flag_trapping_math
|| !TARGET_ROUND
)
30467 if (out_mode
== SImode
&& in_mode
== DFmode
)
30469 if (out_n
== 4 && in_n
== 2)
30470 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
30471 else if (out_n
== 8 && in_n
== 4)
30472 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
30476 case BUILT_IN_ICEILF
:
30477 case BUILT_IN_LCEILF
:
30478 case BUILT_IN_LLCEILF
:
30479 /* The round insn does not trap on denormals. */
30480 if (flag_trapping_math
|| !TARGET_ROUND
)
30483 if (out_mode
== SImode
&& in_mode
== SFmode
)
30485 if (out_n
== 4 && in_n
== 4)
30486 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
30487 else if (out_n
== 8 && in_n
== 8)
30488 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
30492 case BUILT_IN_IRINT
:
30493 case BUILT_IN_LRINT
:
30494 case BUILT_IN_LLRINT
:
30495 if (out_mode
== SImode
&& in_mode
== DFmode
)
30497 if (out_n
== 4 && in_n
== 2)
30498 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
30499 else if (out_n
== 8 && in_n
== 4)
30500 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
30504 case BUILT_IN_IRINTF
:
30505 case BUILT_IN_LRINTF
:
30506 case BUILT_IN_LLRINTF
:
30507 if (out_mode
== SImode
&& in_mode
== SFmode
)
30509 if (out_n
== 4 && in_n
== 4)
30510 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
30511 else if (out_n
== 8 && in_n
== 8)
30512 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
30516 case BUILT_IN_IROUND
:
30517 case BUILT_IN_LROUND
:
30518 case BUILT_IN_LLROUND
:
30519 /* The round insn does not trap on denormals. */
30520 if (flag_trapping_math
|| !TARGET_ROUND
)
30523 if (out_mode
== SImode
&& in_mode
== DFmode
)
30525 if (out_n
== 4 && in_n
== 2)
30526 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
30527 else if (out_n
== 8 && in_n
== 4)
30528 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
30532 case BUILT_IN_IROUNDF
:
30533 case BUILT_IN_LROUNDF
:
30534 case BUILT_IN_LLROUNDF
:
30535 /* The round insn does not trap on denormals. */
30536 if (flag_trapping_math
|| !TARGET_ROUND
)
30539 if (out_mode
== SImode
&& in_mode
== SFmode
)
30541 if (out_n
== 4 && in_n
== 4)
30542 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
30543 else if (out_n
== 8 && in_n
== 8)
30544 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
30548 case BUILT_IN_COPYSIGN
:
30549 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30551 if (out_n
== 2 && in_n
== 2)
30552 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
30553 else if (out_n
== 4 && in_n
== 4)
30554 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
30558 case BUILT_IN_COPYSIGNF
:
30559 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30561 if (out_n
== 4 && in_n
== 4)
30562 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
30563 else if (out_n
== 8 && in_n
== 8)
30564 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
30568 case BUILT_IN_FLOOR
:
30569 /* The round insn does not trap on denormals. */
30570 if (flag_trapping_math
|| !TARGET_ROUND
)
30573 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30575 if (out_n
== 2 && in_n
== 2)
30576 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
30577 else if (out_n
== 4 && in_n
== 4)
30578 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
30582 case BUILT_IN_FLOORF
:
30583 /* The round insn does not trap on denormals. */
30584 if (flag_trapping_math
|| !TARGET_ROUND
)
30587 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30589 if (out_n
== 4 && in_n
== 4)
30590 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
30591 else if (out_n
== 8 && in_n
== 8)
30592 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
30596 case BUILT_IN_CEIL
:
30597 /* The round insn does not trap on denormals. */
30598 if (flag_trapping_math
|| !TARGET_ROUND
)
30601 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30603 if (out_n
== 2 && in_n
== 2)
30604 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
30605 else if (out_n
== 4 && in_n
== 4)
30606 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
30610 case BUILT_IN_CEILF
:
30611 /* The round insn does not trap on denormals. */
30612 if (flag_trapping_math
|| !TARGET_ROUND
)
30615 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30617 if (out_n
== 4 && in_n
== 4)
30618 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
30619 else if (out_n
== 8 && in_n
== 8)
30620 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
30624 case BUILT_IN_TRUNC
:
30625 /* The round insn does not trap on denormals. */
30626 if (flag_trapping_math
|| !TARGET_ROUND
)
30629 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30631 if (out_n
== 2 && in_n
== 2)
30632 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
30633 else if (out_n
== 4 && in_n
== 4)
30634 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
30638 case BUILT_IN_TRUNCF
:
30639 /* The round insn does not trap on denormals. */
30640 if (flag_trapping_math
|| !TARGET_ROUND
)
30643 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30645 if (out_n
== 4 && in_n
== 4)
30646 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
30647 else if (out_n
== 8 && in_n
== 8)
30648 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
30652 case BUILT_IN_RINT
:
30653 /* The round insn does not trap on denormals. */
30654 if (flag_trapping_math
|| !TARGET_ROUND
)
30657 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30659 if (out_n
== 2 && in_n
== 2)
30660 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
30661 else if (out_n
== 4 && in_n
== 4)
30662 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
30666 case BUILT_IN_RINTF
:
30667 /* The round insn does not trap on denormals. */
30668 if (flag_trapping_math
|| !TARGET_ROUND
)
30671 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30673 if (out_n
== 4 && in_n
== 4)
30674 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
30675 else if (out_n
== 8 && in_n
== 8)
30676 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
30680 case BUILT_IN_ROUND
:
30681 /* The round insn does not trap on denormals. */
30682 if (flag_trapping_math
|| !TARGET_ROUND
)
30685 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30687 if (out_n
== 2 && in_n
== 2)
30688 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
30689 else if (out_n
== 4 && in_n
== 4)
30690 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
30694 case BUILT_IN_ROUNDF
:
30695 /* The round insn does not trap on denormals. */
30696 if (flag_trapping_math
|| !TARGET_ROUND
)
30699 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30701 if (out_n
== 4 && in_n
== 4)
30702 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
30703 else if (out_n
== 8 && in_n
== 8)
30704 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
30709 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30711 if (out_n
== 2 && in_n
== 2)
30712 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
30713 if (out_n
== 4 && in_n
== 4)
30714 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
30718 case BUILT_IN_FMAF
:
30719 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30721 if (out_n
== 4 && in_n
== 4)
30722 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
30723 if (out_n
== 8 && in_n
== 8)
30724 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
30732 /* Dispatch to a handler for a vectorization library. */
30733 if (ix86_veclib_handler
)
30734 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
30740 /* Handler for an SVML-style interface to
30741 a library with vectorized intrinsics. */
30744 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
30747 tree fntype
, new_fndecl
, args
;
30750 enum machine_mode el_mode
, in_mode
;
30753 /* The SVML is suitable for unsafe math only. */
30754 if (!flag_unsafe_math_optimizations
)
30757 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30758 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30759 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30760 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30761 if (el_mode
!= in_mode
30769 case BUILT_IN_LOG10
:
30771 case BUILT_IN_TANH
:
30773 case BUILT_IN_ATAN
:
30774 case BUILT_IN_ATAN2
:
30775 case BUILT_IN_ATANH
:
30776 case BUILT_IN_CBRT
:
30777 case BUILT_IN_SINH
:
30779 case BUILT_IN_ASINH
:
30780 case BUILT_IN_ASIN
:
30781 case BUILT_IN_COSH
:
30783 case BUILT_IN_ACOSH
:
30784 case BUILT_IN_ACOS
:
30785 if (el_mode
!= DFmode
|| n
!= 2)
30789 case BUILT_IN_EXPF
:
30790 case BUILT_IN_LOGF
:
30791 case BUILT_IN_LOG10F
:
30792 case BUILT_IN_POWF
:
30793 case BUILT_IN_TANHF
:
30794 case BUILT_IN_TANF
:
30795 case BUILT_IN_ATANF
:
30796 case BUILT_IN_ATAN2F
:
30797 case BUILT_IN_ATANHF
:
30798 case BUILT_IN_CBRTF
:
30799 case BUILT_IN_SINHF
:
30800 case BUILT_IN_SINF
:
30801 case BUILT_IN_ASINHF
:
30802 case BUILT_IN_ASINF
:
30803 case BUILT_IN_COSHF
:
30804 case BUILT_IN_COSF
:
30805 case BUILT_IN_ACOSHF
:
30806 case BUILT_IN_ACOSF
:
30807 if (el_mode
!= SFmode
|| n
!= 4)
30815 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30817 if (fn
== BUILT_IN_LOGF
)
30818 strcpy (name
, "vmlsLn4");
30819 else if (fn
== BUILT_IN_LOG
)
30820 strcpy (name
, "vmldLn2");
30823 sprintf (name
, "vmls%s", bname
+10);
30824 name
[strlen (name
)-1] = '4';
30827 sprintf (name
, "vmld%s2", bname
+10);
30829 /* Convert to uppercase. */
30833 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30835 args
= TREE_CHAIN (args
))
30839 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30841 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30843 /* Build a function declaration for the vectorized function. */
30844 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30845 FUNCTION_DECL
, get_identifier (name
), fntype
);
30846 TREE_PUBLIC (new_fndecl
) = 1;
30847 DECL_EXTERNAL (new_fndecl
) = 1;
30848 DECL_IS_NOVOPS (new_fndecl
) = 1;
30849 TREE_READONLY (new_fndecl
) = 1;
30854 /* Handler for an ACML-style interface to
30855 a library with vectorized intrinsics. */
30858 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
30860 char name
[20] = "__vr.._";
30861 tree fntype
, new_fndecl
, args
;
30864 enum machine_mode el_mode
, in_mode
;
30867 /* The ACML is 64bits only and suitable for unsafe math only as
30868 it does not correctly support parts of IEEE with the required
30869 precision such as denormals. */
30871 || !flag_unsafe_math_optimizations
)
30874 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30875 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30876 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30877 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30878 if (el_mode
!= in_mode
30888 case BUILT_IN_LOG2
:
30889 case BUILT_IN_LOG10
:
30892 if (el_mode
!= DFmode
30897 case BUILT_IN_SINF
:
30898 case BUILT_IN_COSF
:
30899 case BUILT_IN_EXPF
:
30900 case BUILT_IN_POWF
:
30901 case BUILT_IN_LOGF
:
30902 case BUILT_IN_LOG2F
:
30903 case BUILT_IN_LOG10F
:
30906 if (el_mode
!= SFmode
30915 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30916 sprintf (name
+ 7, "%s", bname
+10);
30919 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30921 args
= TREE_CHAIN (args
))
30925 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30927 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30929 /* Build a function declaration for the vectorized function. */
30930 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30931 FUNCTION_DECL
, get_identifier (name
), fntype
);
30932 TREE_PUBLIC (new_fndecl
) = 1;
30933 DECL_EXTERNAL (new_fndecl
) = 1;
30934 DECL_IS_NOVOPS (new_fndecl
) = 1;
30935 TREE_READONLY (new_fndecl
) = 1;
30940 /* Returns a decl of a function that implements gather load with
30941 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30942 Return NULL_TREE if it is not available. */
30945 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
30946 const_tree index_type
, int scale
)
30949 enum ix86_builtins code
;
30954 if ((TREE_CODE (index_type
) != INTEGER_TYPE
30955 && !POINTER_TYPE_P (index_type
))
30956 || (TYPE_MODE (index_type
) != SImode
30957 && TYPE_MODE (index_type
) != DImode
))
30960 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
30963 /* v*gather* insn sign extends index to pointer mode. */
30964 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
30965 && TYPE_UNSIGNED (index_type
))
30970 || (scale
& (scale
- 1)) != 0)
30973 si
= TYPE_MODE (index_type
) == SImode
;
30974 switch (TYPE_MODE (mem_vectype
))
30977 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
30980 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
30983 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
30986 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
30989 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
30992 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
30995 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
30998 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
31004 return ix86_builtins
[code
];
31007 /* Returns a code for a target-specific builtin that implements
31008 reciprocal of the function, or NULL_TREE if not available. */
31011 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
31012 bool sqrt ATTRIBUTE_UNUSED
)
31014 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
31015 && flag_finite_math_only
&& !flag_trapping_math
31016 && flag_unsafe_math_optimizations
))
31020 /* Machine dependent builtins. */
31023 /* Vectorized version of sqrt to rsqrt conversion. */
31024 case IX86_BUILTIN_SQRTPS_NR
:
31025 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
31027 case IX86_BUILTIN_SQRTPS_NR256
:
31028 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
31034 /* Normal builtins. */
31037 /* Sqrt to rsqrt conversion. */
31038 case BUILT_IN_SQRTF
:
31039 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
31046 /* Helper for avx_vpermilps256_operand et al. This is also used by
31047 the expansion functions to turn the parallel back into a mask.
31048 The return value is 0 for no match and the imm8+1 for a match. */
31051 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
31053 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
31055 unsigned char ipar
[8];
31057 if (XVECLEN (par
, 0) != (int) nelt
)
31060 /* Validate that all of the elements are constants, and not totally
31061 out of range. Copy the data into an integral array to make the
31062 subsequent checks easier. */
31063 for (i
= 0; i
< nelt
; ++i
)
31065 rtx er
= XVECEXP (par
, 0, i
);
31066 unsigned HOST_WIDE_INT ei
;
31068 if (!CONST_INT_P (er
))
31079 /* In the 256-bit DFmode case, we can only move elements within
31081 for (i
= 0; i
< 2; ++i
)
31085 mask
|= ipar
[i
] << i
;
31087 for (i
= 2; i
< 4; ++i
)
31091 mask
|= (ipar
[i
] - 2) << i
;
31096 /* In the 256-bit SFmode case, we have full freedom of movement
31097 within the low 128-bit lane, but the high 128-bit lane must
31098 mirror the exact same pattern. */
31099 for (i
= 0; i
< 4; ++i
)
31100 if (ipar
[i
] + 4 != ipar
[i
+ 4])
31107 /* In the 128-bit case, we've full freedom in the placement of
31108 the elements from the source operand. */
31109 for (i
= 0; i
< nelt
; ++i
)
31110 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
31114 gcc_unreachable ();
31117 /* Make sure success has a non-zero value by adding one. */
31121 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
31122 the expansion functions to turn the parallel back into a mask.
31123 The return value is 0 for no match and the imm8+1 for a match. */
31126 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
31128 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
31130 unsigned char ipar
[8];
31132 if (XVECLEN (par
, 0) != (int) nelt
)
31135 /* Validate that all of the elements are constants, and not totally
31136 out of range. Copy the data into an integral array to make the
31137 subsequent checks easier. */
31138 for (i
= 0; i
< nelt
; ++i
)
31140 rtx er
= XVECEXP (par
, 0, i
);
31141 unsigned HOST_WIDE_INT ei
;
31143 if (!CONST_INT_P (er
))
31146 if (ei
>= 2 * nelt
)
31151 /* Validate that the halves of the permute are halves. */
31152 for (i
= 0; i
< nelt2
- 1; ++i
)
31153 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31155 for (i
= nelt2
; i
< nelt
- 1; ++i
)
31156 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31159 /* Reconstruct the mask. */
31160 for (i
= 0; i
< 2; ++i
)
31162 unsigned e
= ipar
[i
* nelt2
];
31166 mask
|= e
<< (i
* 4);
31169 /* Make sure success has a non-zero value by adding one. */
31173 /* Store OPERAND to the memory after reload is completed. This means
31174 that we can't easily use assign_stack_local. */
31176 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
31180 gcc_assert (reload_completed
);
31181 if (ix86_using_red_zone ())
31183 result
= gen_rtx_MEM (mode
,
31184 gen_rtx_PLUS (Pmode
,
31186 GEN_INT (-RED_ZONE_SIZE
)));
31187 emit_move_insn (result
, operand
);
31189 else if (TARGET_64BIT
)
31195 operand
= gen_lowpart (DImode
, operand
);
31199 gen_rtx_SET (VOIDmode
,
31200 gen_rtx_MEM (DImode
,
31201 gen_rtx_PRE_DEC (DImode
,
31202 stack_pointer_rtx
)),
31206 gcc_unreachable ();
31208 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31217 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
31219 gen_rtx_SET (VOIDmode
,
31220 gen_rtx_MEM (SImode
,
31221 gen_rtx_PRE_DEC (Pmode
,
31222 stack_pointer_rtx
)),
31225 gen_rtx_SET (VOIDmode
,
31226 gen_rtx_MEM (SImode
,
31227 gen_rtx_PRE_DEC (Pmode
,
31228 stack_pointer_rtx
)),
31233 /* Store HImodes as SImodes. */
31234 operand
= gen_lowpart (SImode
, operand
);
31238 gen_rtx_SET (VOIDmode
,
31239 gen_rtx_MEM (GET_MODE (operand
),
31240 gen_rtx_PRE_DEC (SImode
,
31241 stack_pointer_rtx
)),
31245 gcc_unreachable ();
31247 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31252 /* Free operand from the memory. */
31254 ix86_free_from_memory (enum machine_mode mode
)
31256 if (!ix86_using_red_zone ())
31260 if (mode
== DImode
|| TARGET_64BIT
)
31264 /* Use LEA to deallocate stack space. In peephole2 it will be converted
31265 to pop or add instruction if registers are available. */
31266 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
31267 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
31272 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
31274 Put float CONST_DOUBLE in the constant pool instead of fp regs.
31275 QImode must go into class Q_REGS.
31276 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
31277 movdf to do mem-to-mem moves through integer regs. */
31280 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
31282 enum machine_mode mode
= GET_MODE (x
);
31284 /* We're only allowed to return a subclass of CLASS. Many of the
31285 following checks fail for NO_REGS, so eliminate that early. */
31286 if (regclass
== NO_REGS
)
31289 /* All classes can load zeros. */
31290 if (x
== CONST0_RTX (mode
))
31293 /* Force constants into memory if we are loading a (nonzero) constant into
31294 an MMX or SSE register. This is because there are no MMX/SSE instructions
31295 to load from a constant. */
31297 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
31300 /* Prefer SSE regs only, if we can use them for math. */
31301 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
31302 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31304 /* Floating-point constants need more complex checks. */
31305 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
31307 /* General regs can load everything. */
31308 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
31311 /* Floats can load 0 and 1 plus some others. Note that we eliminated
31312 zero above. We only want to wind up preferring 80387 registers if
31313 we plan on doing computation with them. */
31315 && standard_80387_constant_p (x
) > 0)
31317 /* Limit class to non-sse. */
31318 if (regclass
== FLOAT_SSE_REGS
)
31320 if (regclass
== FP_TOP_SSE_REGS
)
31322 if (regclass
== FP_SECOND_SSE_REGS
)
31323 return FP_SECOND_REG
;
31324 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
31331 /* Generally when we see PLUS here, it's the function invariant
31332 (plus soft-fp const_int). Which can only be computed into general
31334 if (GET_CODE (x
) == PLUS
)
31335 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
31337 /* QImode constants are easy to load, but non-constant QImode data
31338 must go into Q_REGS. */
31339 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
31341 if (reg_class_subset_p (regclass
, Q_REGS
))
31343 if (reg_class_subset_p (Q_REGS
, regclass
))
31351 /* Discourage putting floating-point values in SSE registers unless
31352 SSE math is being used, and likewise for the 387 registers. */
31354 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
31356 enum machine_mode mode
= GET_MODE (x
);
31358 /* Restrict the output reload class to the register bank that we are doing
31359 math on. If we would like not to return a subset of CLASS, reject this
31360 alternative: if reload cannot do this, it will still use its choice. */
31361 mode
= GET_MODE (x
);
31362 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
31363 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
31365 if (X87_FLOAT_MODE_P (mode
))
31367 if (regclass
== FP_TOP_SSE_REGS
)
31369 else if (regclass
== FP_SECOND_SSE_REGS
)
31370 return FP_SECOND_REG
;
31372 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31379 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
31380 enum machine_mode mode
, secondary_reload_info
*sri
)
31382 /* Double-word spills from general registers to non-offsettable memory
31383 references (zero-extended addresses) require special handling. */
31386 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
31387 && rclass
== GENERAL_REGS
31388 && !offsettable_memref_p (x
))
31391 ? CODE_FOR_reload_noff_load
31392 : CODE_FOR_reload_noff_store
);
31393 /* Add the cost of moving address to a temporary. */
31394 sri
->extra_cost
= 1;
31399 /* QImode spills from non-QI registers require
31400 intermediate register on 32bit targets. */
31402 && !in_p
&& mode
== QImode
31403 && (rclass
== GENERAL_REGS
31404 || rclass
== LEGACY_REGS
31405 || rclass
== INDEX_REGS
))
31414 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
31415 regno
= true_regnum (x
);
31417 /* Return Q_REGS if the operand is in memory. */
31422 /* This condition handles corner case where an expression involving
31423 pointers gets vectorized. We're trying to use the address of a
31424 stack slot as a vector initializer.
31426 (set (reg:V2DI 74 [ vect_cst_.2 ])
31427 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
31429 Eventually frame gets turned into sp+offset like this:
31431 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31432 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31433 (const_int 392 [0x188]))))
31435 That later gets turned into:
31437 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31438 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31439 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
31441 We'll have the following reload recorded:
31443 Reload 0: reload_in (DI) =
31444 (plus:DI (reg/f:DI 7 sp)
31445 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
31446 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31447 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
31448 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
31449 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31450 reload_reg_rtx: (reg:V2DI 22 xmm1)
31452 Which isn't going to work since SSE instructions can't handle scalar
31453 additions. Returning GENERAL_REGS forces the addition into integer
31454 register and reload can handle subsequent reloads without problems. */
31456 if (in_p
&& GET_CODE (x
) == PLUS
31457 && SSE_CLASS_P (rclass
)
31458 && SCALAR_INT_MODE_P (mode
))
31459 return GENERAL_REGS
;
31464 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
31467 ix86_class_likely_spilled_p (reg_class_t rclass
)
31478 case SSE_FIRST_REG
:
31480 case FP_SECOND_REG
:
31490 /* If we are copying between general and FP registers, we need a memory
31491 location. The same is true for SSE and MMX registers.
31493 To optimize register_move_cost performance, allow inline variant.
31495 The macro can't work reliably when one of the CLASSES is class containing
31496 registers from multiple units (SSE, MMX, integer). We avoid this by never
31497 combining those units in single alternative in the machine description.
31498 Ensure that this constraint holds to avoid unexpected surprises.
31500 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
31501 enforce these sanity checks. */
31504 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31505 enum machine_mode mode
, int strict
)
31507 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
31508 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
31509 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
31510 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
31511 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
31512 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
31514 gcc_assert (!strict
);
31518 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
31521 /* ??? This is a lie. We do have moves between mmx/general, and for
31522 mmx/sse2. But by saying we need secondary memory we discourage the
31523 register allocator from using the mmx registers unless needed. */
31524 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
31527 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31529 /* SSE1 doesn't have any direct moves from other classes. */
31533 /* If the target says that inter-unit moves are more expensive
31534 than moving through memory, then don't generate them. */
31535 if (!TARGET_INTER_UNIT_MOVES
)
31538 /* Between SSE and general, we have moves no larger than word size. */
31539 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
31547 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31548 enum machine_mode mode
, int strict
)
31550 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
31553 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31555 On the 80386, this is the size of MODE in words,
31556 except in the FP regs, where a single reg is always enough. */
31558 static unsigned char
31559 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
31561 if (MAYBE_INTEGER_CLASS_P (rclass
))
31563 if (mode
== XFmode
)
31564 return (TARGET_64BIT
? 2 : 3);
31565 else if (mode
== XCmode
)
31566 return (TARGET_64BIT
? 4 : 6);
31568 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
31572 if (COMPLEX_MODE_P (mode
))
31579 /* Return true if the registers in CLASS cannot represent the change from
31580 modes FROM to TO. */
31583 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
31584 enum reg_class regclass
)
31589 /* x87 registers can't do subreg at all, as all values are reformatted
31590 to extended precision. */
31591 if (MAYBE_FLOAT_CLASS_P (regclass
))
31594 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
31596 /* Vector registers do not support QI or HImode loads. If we don't
31597 disallow a change to these modes, reload will assume it's ok to
31598 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31599 the vec_dupv4hi pattern. */
31600 if (GET_MODE_SIZE (from
) < 4)
31603 /* Vector registers do not support subreg with nonzero offsets, which
31604 are otherwise valid for integer registers. Since we can't see
31605 whether we have a nonzero offset from here, prohibit all
31606 nonparadoxical subregs changing size. */
31607 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
31614 /* Return the cost of moving data of mode M between a
31615 register and memory. A value of 2 is the default; this cost is
31616 relative to those in `REGISTER_MOVE_COST'.
31618 This function is used extensively by register_move_cost that is used to
31619 build tables at startup. Make it inline in this case.
31620 When IN is 2, return maximum of in and out move cost.
31622 If moving between registers and memory is more expensive than
31623 between two registers, you should define this macro to express the
31626 Model also increased moving costs of QImode registers in non
31630 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
31634 if (FLOAT_CLASS_P (regclass
))
31652 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
31653 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
31655 if (SSE_CLASS_P (regclass
))
31658 switch (GET_MODE_SIZE (mode
))
31673 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
31674 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
31676 if (MMX_CLASS_P (regclass
))
31679 switch (GET_MODE_SIZE (mode
))
31691 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
31692 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
31694 switch (GET_MODE_SIZE (mode
))
31697 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
31700 return ix86_cost
->int_store
[0];
31701 if (TARGET_PARTIAL_REG_DEPENDENCY
31702 && optimize_function_for_speed_p (cfun
))
31703 cost
= ix86_cost
->movzbl_load
;
31705 cost
= ix86_cost
->int_load
[0];
31707 return MAX (cost
, ix86_cost
->int_store
[0]);
31713 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
31715 return ix86_cost
->movzbl_load
;
31717 return ix86_cost
->int_store
[0] + 4;
31722 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
31723 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
31725 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31726 if (mode
== TFmode
)
31729 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
31731 cost
= ix86_cost
->int_load
[2];
31733 cost
= ix86_cost
->int_store
[2];
31734 return (cost
* (((int) GET_MODE_SIZE (mode
)
31735 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
31740 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
31743 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
31747 /* Return the cost of moving data from a register in class CLASS1 to
31748 one in class CLASS2.
31750 It is not required that the cost always equal 2 when FROM is the same as TO;
31751 on some machines it is expensive to move between registers if they are not
31752 general registers. */
31755 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
31756 reg_class_t class2_i
)
31758 enum reg_class class1
= (enum reg_class
) class1_i
;
31759 enum reg_class class2
= (enum reg_class
) class2_i
;
31761 /* In case we require secondary memory, compute cost of the store followed
31762 by load. In order to avoid bad register allocation choices, we need
31763 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31765 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
31769 cost
+= inline_memory_move_cost (mode
, class1
, 2);
31770 cost
+= inline_memory_move_cost (mode
, class2
, 2);
31772 /* In case of copying from general_purpose_register we may emit multiple
31773 stores followed by single load causing memory size mismatch stall.
31774 Count this as arbitrarily high cost of 20. */
31775 if (targetm
.class_max_nregs (class1
, mode
)
31776 > targetm
.class_max_nregs (class2
, mode
))
31779 /* In the case of FP/MMX moves, the registers actually overlap, and we
31780 have to switch modes in order to treat them differently. */
31781 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
31782 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
31788 /* Moves between SSE/MMX and integer unit are expensive. */
31789 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
31790 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31792 /* ??? By keeping returned value relatively high, we limit the number
31793 of moves between integer and MMX/SSE registers for all targets.
31794 Additionally, high value prevents problem with x86_modes_tieable_p(),
31795 where integer modes in MMX/SSE registers are not tieable
31796 because of missing QImode and HImode moves to, from or between
31797 MMX/SSE registers. */
31798 return MAX (8, ix86_cost
->mmxsse_to_integer
);
31800 if (MAYBE_FLOAT_CLASS_P (class1
))
31801 return ix86_cost
->fp_move
;
31802 if (MAYBE_SSE_CLASS_P (class1
))
31803 return ix86_cost
->sse_move
;
31804 if (MAYBE_MMX_CLASS_P (class1
))
31805 return ix86_cost
->mmx_move
;
31809 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31813 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
31815 /* Flags and only flags can only hold CCmode values. */
31816 if (CC_REGNO_P (regno
))
31817 return GET_MODE_CLASS (mode
) == MODE_CC
;
31818 if (GET_MODE_CLASS (mode
) == MODE_CC
31819 || GET_MODE_CLASS (mode
) == MODE_RANDOM
31820 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
31822 if (FP_REGNO_P (regno
))
31823 return VALID_FP_MODE_P (mode
);
31824 if (SSE_REGNO_P (regno
))
31826 /* We implement the move patterns for all vector modes into and
31827 out of SSE registers, even when no operation instructions
31828 are available. OImode move is available only when AVX is
31830 return ((TARGET_AVX
&& mode
== OImode
)
31831 || VALID_AVX256_REG_MODE (mode
)
31832 || VALID_SSE_REG_MODE (mode
)
31833 || VALID_SSE2_REG_MODE (mode
)
31834 || VALID_MMX_REG_MODE (mode
)
31835 || VALID_MMX_REG_MODE_3DNOW (mode
));
31837 if (MMX_REGNO_P (regno
))
31839 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31840 so if the register is available at all, then we can move data of
31841 the given mode into or out of it. */
31842 return (VALID_MMX_REG_MODE (mode
)
31843 || VALID_MMX_REG_MODE_3DNOW (mode
));
31846 if (mode
== QImode
)
31848 /* Take care for QImode values - they can be in non-QI regs,
31849 but then they do cause partial register stalls. */
31850 if (regno
<= BX_REG
|| TARGET_64BIT
)
31852 if (!TARGET_PARTIAL_REG_STALL
)
31854 return !can_create_pseudo_p ();
31856 /* We handle both integer and floats in the general purpose registers. */
31857 else if (VALID_INT_MODE_P (mode
))
31859 else if (VALID_FP_MODE_P (mode
))
31861 else if (VALID_DFP_MODE_P (mode
))
31863 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31864 on to use that value in smaller contexts, this can easily force a
31865 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31866 supporting DImode, allow it. */
31867 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
31873 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31874 tieable integer mode. */
31877 ix86_tieable_integer_mode_p (enum machine_mode mode
)
31886 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
31889 return TARGET_64BIT
;
31896 /* Return true if MODE1 is accessible in a register that can hold MODE2
31897 without copying. That is, all register classes that can hold MODE2
31898 can also hold MODE1. */
31901 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
31903 if (mode1
== mode2
)
31906 if (ix86_tieable_integer_mode_p (mode1
)
31907 && ix86_tieable_integer_mode_p (mode2
))
31910 /* MODE2 being XFmode implies fp stack or general regs, which means we
31911 can tie any smaller floating point modes to it. Note that we do not
31912 tie this with TFmode. */
31913 if (mode2
== XFmode
)
31914 return mode1
== SFmode
|| mode1
== DFmode
;
31916 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31917 that we can tie it with SFmode. */
31918 if (mode2
== DFmode
)
31919 return mode1
== SFmode
;
31921 /* If MODE2 is only appropriate for an SSE register, then tie with
31922 any other mode acceptable to SSE registers. */
31923 if (GET_MODE_SIZE (mode2
) == 32
31924 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31925 return (GET_MODE_SIZE (mode1
) == 32
31926 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31927 if (GET_MODE_SIZE (mode2
) == 16
31928 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31929 return (GET_MODE_SIZE (mode1
) == 16
31930 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31932 /* If MODE2 is appropriate for an MMX register, then tie
31933 with any other mode acceptable to MMX registers. */
31934 if (GET_MODE_SIZE (mode2
) == 8
31935 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
31936 return (GET_MODE_SIZE (mode1
) == 8
31937 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
31942 /* Return the cost of moving between two registers of mode MODE. */
31945 ix86_set_reg_reg_cost (enum machine_mode mode
)
31947 unsigned int units
= UNITS_PER_WORD
;
31949 switch (GET_MODE_CLASS (mode
))
31955 units
= GET_MODE_SIZE (CCmode
);
31959 if ((TARGET_SSE2
&& mode
== TFmode
)
31960 || (TARGET_80387
&& mode
== XFmode
)
31961 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
31962 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
31963 units
= GET_MODE_SIZE (mode
);
31966 case MODE_COMPLEX_FLOAT
:
31967 if ((TARGET_SSE2
&& mode
== TCmode
)
31968 || (TARGET_80387
&& mode
== XCmode
)
31969 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
31970 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
31971 units
= GET_MODE_SIZE (mode
);
31974 case MODE_VECTOR_INT
:
31975 case MODE_VECTOR_FLOAT
:
31976 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
31977 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
31978 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
31979 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
31980 units
= GET_MODE_SIZE (mode
);
31983 /* Return the cost of moving between two registers of mode MODE,
31984 assuming that the move will be in pieces of at most UNITS bytes. */
31985 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
31988 /* Compute a (partial) cost for rtx X. Return true if the complete
31989 cost has been computed, and false if subexpressions should be
31990 scanned. In either case, *TOTAL contains the cost result. */
31993 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
31996 enum rtx_code code
= (enum rtx_code
) code_i
;
31997 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
31998 enum machine_mode mode
= GET_MODE (x
);
31999 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
32004 if (register_operand (SET_DEST (x
), VOIDmode
)
32005 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
32007 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
32016 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
32018 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
32020 else if (flag_pic
&& SYMBOLIC_CONST (x
)
32022 || (!GET_CODE (x
) != LABEL_REF
32023 && (GET_CODE (x
) != SYMBOL_REF
32024 || !SYMBOL_REF_LOCAL_P (x
)))))
32031 if (mode
== VOIDmode
)
32034 switch (standard_80387_constant_p (x
))
32039 default: /* Other constants */
32049 /* Start with (MEM (SYMBOL_REF)), since that's where
32050 it'll probably end up. Add a penalty for size. */
32051 *total
= (COSTS_N_INSNS (1)
32052 + (flag_pic
!= 0 && !TARGET_64BIT
)
32053 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
32057 /* The zero extensions is often completely free on x86_64, so make
32058 it as cheap as possible. */
32059 if (TARGET_64BIT
&& mode
== DImode
32060 && GET_MODE (XEXP (x
, 0)) == SImode
)
32062 else if (TARGET_ZERO_EXTEND_WITH_AND
)
32063 *total
= cost
->add
;
32065 *total
= cost
->movzx
;
32069 *total
= cost
->movsx
;
32073 if (SCALAR_INT_MODE_P (mode
)
32074 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
32075 && CONST_INT_P (XEXP (x
, 1)))
32077 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32080 *total
= cost
->add
;
32083 if ((value
== 2 || value
== 3)
32084 && cost
->lea
<= cost
->shift_const
)
32086 *total
= cost
->lea
;
32096 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32098 /* ??? Should be SSE vector operation cost. */
32099 /* At least for published AMD latencies, this really is the same
32100 as the latency for a simple fpu operation like fabs. */
32101 /* V*QImode is emulated with 1-11 insns. */
32102 if (mode
== V16QImode
|| mode
== V32QImode
)
32105 if (TARGET_XOP
&& mode
== V16QImode
)
32107 /* For XOP we use vpshab, which requires a broadcast of the
32108 value to the variable shift insn. For constants this
32109 means a V16Q const in mem; even when we can perform the
32110 shift with one insn set the cost to prefer paddb. */
32111 if (CONSTANT_P (XEXP (x
, 1)))
32113 *total
= (cost
->fabs
32114 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
32115 + (speed
? 2 : COSTS_N_BYTES (16)));
32121 count
= TARGET_SSSE3
? 7 : 11;
32122 *total
= cost
->fabs
* count
;
32125 *total
= cost
->fabs
;
32128 if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
32130 if (CONST_INT_P (XEXP (x
, 1)))
32132 if (INTVAL (XEXP (x
, 1)) > 32)
32133 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
32135 *total
= cost
->shift_const
* 2;
32139 if (GET_CODE (XEXP (x
, 1)) == AND
)
32140 *total
= cost
->shift_var
* 2;
32142 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
32147 if (CONST_INT_P (XEXP (x
, 1)))
32148 *total
= cost
->shift_const
;
32150 *total
= cost
->shift_var
;
32158 gcc_assert (FLOAT_MODE_P (mode
));
32159 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
32161 /* ??? SSE scalar/vector cost should be used here. */
32162 /* ??? Bald assumption that fma has the same cost as fmul. */
32163 *total
= cost
->fmul
;
32164 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
32166 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
32168 if (GET_CODE (sub
) == NEG
)
32169 sub
= XEXP (sub
, 0);
32170 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
32173 if (GET_CODE (sub
) == NEG
)
32174 sub
= XEXP (sub
, 0);
32175 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
32180 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32182 /* ??? SSE scalar cost should be used here. */
32183 *total
= cost
->fmul
;
32186 else if (X87_FLOAT_MODE_P (mode
))
32188 *total
= cost
->fmul
;
32191 else if (FLOAT_MODE_P (mode
))
32193 /* ??? SSE vector cost should be used here. */
32194 *total
= cost
->fmul
;
32197 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32199 /* V*QImode is emulated with 7-13 insns. */
32200 if (mode
== V16QImode
|| mode
== V32QImode
)
32202 int extra
= TARGET_XOP
? 5 : TARGET_SSSE3
? 6 : 11;
32203 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
32205 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
32206 insns, including two PMULUDQ. */
32207 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
32208 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
32210 *total
= cost
->fmul
;
32215 rtx op0
= XEXP (x
, 0);
32216 rtx op1
= XEXP (x
, 1);
32218 if (CONST_INT_P (XEXP (x
, 1)))
32220 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32221 for (nbits
= 0; value
!= 0; value
&= value
- 1)
32225 /* This is arbitrary. */
32228 /* Compute costs correctly for widening multiplication. */
32229 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
32230 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
32231 == GET_MODE_SIZE (mode
))
32233 int is_mulwiden
= 0;
32234 enum machine_mode inner_mode
= GET_MODE (op0
);
32236 if (GET_CODE (op0
) == GET_CODE (op1
))
32237 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
32238 else if (CONST_INT_P (op1
))
32240 if (GET_CODE (op0
) == SIGN_EXTEND
)
32241 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
32244 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
32248 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
32251 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
32252 + nbits
* cost
->mult_bit
32253 + rtx_cost (op0
, outer_code
, opno
, speed
)
32254 + rtx_cost (op1
, outer_code
, opno
, speed
));
32263 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32264 /* ??? SSE cost should be used here. */
32265 *total
= cost
->fdiv
;
32266 else if (X87_FLOAT_MODE_P (mode
))
32267 *total
= cost
->fdiv
;
32268 else if (FLOAT_MODE_P (mode
))
32269 /* ??? SSE vector cost should be used here. */
32270 *total
= cost
->fdiv
;
32272 *total
= cost
->divide
[MODE_INDEX (mode
)];
32276 if (GET_MODE_CLASS (mode
) == MODE_INT
32277 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
32279 if (GET_CODE (XEXP (x
, 0)) == PLUS
32280 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
32281 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
32282 && CONSTANT_P (XEXP (x
, 1)))
32284 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
32285 if (val
== 2 || val
== 4 || val
== 8)
32287 *total
= cost
->lea
;
32288 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32289 outer_code
, opno
, speed
);
32290 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
32291 outer_code
, opno
, speed
);
32292 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32296 else if (GET_CODE (XEXP (x
, 0)) == MULT
32297 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
32299 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
32300 if (val
== 2 || val
== 4 || val
== 8)
32302 *total
= cost
->lea
;
32303 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32304 outer_code
, opno
, speed
);
32305 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32309 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
32311 *total
= cost
->lea
;
32312 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32313 outer_code
, opno
, speed
);
32314 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32315 outer_code
, opno
, speed
);
32316 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32323 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32325 /* ??? SSE cost should be used here. */
32326 *total
= cost
->fadd
;
32329 else if (X87_FLOAT_MODE_P (mode
))
32331 *total
= cost
->fadd
;
32334 else if (FLOAT_MODE_P (mode
))
32336 /* ??? SSE vector cost should be used here. */
32337 *total
= cost
->fadd
;
32345 if (!TARGET_64BIT
&& mode
== DImode
)
32347 *total
= (cost
->add
* 2
32348 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
32349 << (GET_MODE (XEXP (x
, 0)) != DImode
))
32350 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
32351 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
32357 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32359 /* ??? SSE cost should be used here. */
32360 *total
= cost
->fchs
;
32363 else if (X87_FLOAT_MODE_P (mode
))
32365 *total
= cost
->fchs
;
32368 else if (FLOAT_MODE_P (mode
))
32370 /* ??? SSE vector cost should be used here. */
32371 *total
= cost
->fchs
;
32377 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32379 /* ??? Should be SSE vector operation cost. */
32380 /* At least for published AMD latencies, this really is the same
32381 as the latency for a simple fpu operation like fabs. */
32382 *total
= cost
->fabs
;
32385 if (!TARGET_64BIT
&& mode
== DImode
)
32386 *total
= cost
->add
* 2;
32388 *total
= cost
->add
;
32392 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
32393 && XEXP (XEXP (x
, 0), 1) == const1_rtx
32394 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
32395 && XEXP (x
, 1) == const0_rtx
)
32397 /* This kind of construct is implemented using test[bwl].
32398 Treat it as if we had an AND. */
32399 *total
= (cost
->add
32400 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
32401 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
32407 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
32412 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32413 /* ??? SSE cost should be used here. */
32414 *total
= cost
->fabs
;
32415 else if (X87_FLOAT_MODE_P (mode
))
32416 *total
= cost
->fabs
;
32417 else if (FLOAT_MODE_P (mode
))
32418 /* ??? SSE vector cost should be used here. */
32419 *total
= cost
->fabs
;
32423 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32424 /* ??? SSE cost should be used here. */
32425 *total
= cost
->fsqrt
;
32426 else if (X87_FLOAT_MODE_P (mode
))
32427 *total
= cost
->fsqrt
;
32428 else if (FLOAT_MODE_P (mode
))
32429 /* ??? SSE vector cost should be used here. */
32430 *total
= cost
->fsqrt
;
32434 if (XINT (x
, 1) == UNSPEC_TP
)
32441 case VEC_DUPLICATE
:
32442 /* ??? Assume all of these vector manipulation patterns are
32443 recognizable. In which case they all pretty much have the
32445 *total
= cost
->fabs
;
32455 static int current_machopic_label_num
;
32457 /* Given a symbol name and its associated stub, write out the
32458 definition of the stub. */
32461 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
32463 unsigned int length
;
32464 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
32465 int label
= ++current_machopic_label_num
;
32467 /* For 64-bit we shouldn't get here. */
32468 gcc_assert (!TARGET_64BIT
);
32470 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32471 symb
= targetm
.strip_name_encoding (symb
);
32473 length
= strlen (stub
);
32474 binder_name
= XALLOCAVEC (char, length
+ 32);
32475 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
32477 length
= strlen (symb
);
32478 symbol_name
= XALLOCAVEC (char, length
+ 32);
32479 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
32481 sprintf (lazy_ptr_name
, "L%d$lz", label
);
32483 if (MACHOPIC_ATT_STUB
)
32484 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
32485 else if (MACHOPIC_PURE
)
32486 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
32488 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
32490 fprintf (file
, "%s:\n", stub
);
32491 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32493 if (MACHOPIC_ATT_STUB
)
32495 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
32497 else if (MACHOPIC_PURE
)
32500 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32501 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
32502 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
32503 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
32504 label
, lazy_ptr_name
, label
);
32505 fprintf (file
, "\tjmp\t*%%ecx\n");
32508 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
32510 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
32511 it needs no stub-binding-helper. */
32512 if (MACHOPIC_ATT_STUB
)
32515 fprintf (file
, "%s:\n", binder_name
);
32519 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
32520 fprintf (file
, "\tpushl\t%%ecx\n");
32523 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
32525 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
32527 /* N.B. Keep the correspondence of these
32528 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
32529 old-pic/new-pic/non-pic stubs; altering this will break
32530 compatibility with existing dylibs. */
32533 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32534 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
32537 /* 16-byte -mdynamic-no-pic stub. */
32538 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
32540 fprintf (file
, "%s:\n", lazy_ptr_name
);
32541 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32542 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
32544 #endif /* TARGET_MACHO */
32546 /* Order the registers for register allocator. */
32549 x86_order_regs_for_local_alloc (void)
32554 /* First allocate the local general purpose registers. */
32555 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32556 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
32557 reg_alloc_order
[pos
++] = i
;
32559 /* Global general purpose registers. */
32560 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32561 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
32562 reg_alloc_order
[pos
++] = i
;
32564 /* x87 registers come first in case we are doing FP math
32566 if (!TARGET_SSE_MATH
)
32567 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32568 reg_alloc_order
[pos
++] = i
;
32570 /* SSE registers. */
32571 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
32572 reg_alloc_order
[pos
++] = i
;
32573 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
32574 reg_alloc_order
[pos
++] = i
;
32576 /* x87 registers. */
32577 if (TARGET_SSE_MATH
)
32578 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32579 reg_alloc_order
[pos
++] = i
;
32581 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
32582 reg_alloc_order
[pos
++] = i
;
32584 /* Initialize the rest of array as we do not allocate some registers
32586 while (pos
< FIRST_PSEUDO_REGISTER
)
32587 reg_alloc_order
[pos
++] = 0;
32590 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
32591 in struct attribute_spec handler. */
32593 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
32595 int flags ATTRIBUTE_UNUSED
,
32596 bool *no_add_attrs
)
32598 if (TREE_CODE (*node
) != FUNCTION_TYPE
32599 && TREE_CODE (*node
) != METHOD_TYPE
32600 && TREE_CODE (*node
) != FIELD_DECL
32601 && TREE_CODE (*node
) != TYPE_DECL
)
32603 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32605 *no_add_attrs
= true;
32610 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
32612 *no_add_attrs
= true;
32615 if (is_attribute_p ("callee_pop_aggregate_return", name
))
32619 cst
= TREE_VALUE (args
);
32620 if (TREE_CODE (cst
) != INTEGER_CST
)
32622 warning (OPT_Wattributes
,
32623 "%qE attribute requires an integer constant argument",
32625 *no_add_attrs
= true;
32627 else if (compare_tree_int (cst
, 0) != 0
32628 && compare_tree_int (cst
, 1) != 0)
32630 warning (OPT_Wattributes
,
32631 "argument to %qE attribute is neither zero, nor one",
32633 *no_add_attrs
= true;
32642 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
32643 struct attribute_spec.handler. */
32645 ix86_handle_abi_attribute (tree
*node
, tree name
,
32646 tree args ATTRIBUTE_UNUSED
,
32647 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32649 if (TREE_CODE (*node
) != FUNCTION_TYPE
32650 && TREE_CODE (*node
) != METHOD_TYPE
32651 && TREE_CODE (*node
) != FIELD_DECL
32652 && TREE_CODE (*node
) != TYPE_DECL
)
32654 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32656 *no_add_attrs
= true;
32660 /* Can combine regparm with all attributes but fastcall. */
32661 if (is_attribute_p ("ms_abi", name
))
32663 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
32665 error ("ms_abi and sysv_abi attributes are not compatible");
32670 else if (is_attribute_p ("sysv_abi", name
))
32672 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
32674 error ("ms_abi and sysv_abi attributes are not compatible");
32683 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32684 struct attribute_spec.handler. */
32686 ix86_handle_struct_attribute (tree
*node
, tree name
,
32687 tree args ATTRIBUTE_UNUSED
,
32688 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32691 if (DECL_P (*node
))
32693 if (TREE_CODE (*node
) == TYPE_DECL
)
32694 type
= &TREE_TYPE (*node
);
32699 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
32701 warning (OPT_Wattributes
, "%qE attribute ignored",
32703 *no_add_attrs
= true;
32706 else if ((is_attribute_p ("ms_struct", name
)
32707 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
32708 || ((is_attribute_p ("gcc_struct", name
)
32709 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
32711 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
32713 *no_add_attrs
= true;
32720 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
32721 tree args ATTRIBUTE_UNUSED
,
32722 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32724 if (TREE_CODE (*node
) != FUNCTION_DECL
)
32726 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32728 *no_add_attrs
= true;
32734 ix86_ms_bitfield_layout_p (const_tree record_type
)
32736 return ((TARGET_MS_BITFIELD_LAYOUT
32737 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
32738 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
32741 /* Returns an expression indicating where the this parameter is
32742 located on entry to the FUNCTION. */
32745 x86_this_parameter (tree function
)
32747 tree type
= TREE_TYPE (function
);
32748 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
32753 const int *parm_regs
;
32755 if (ix86_function_type_abi (type
) == MS_ABI
)
32756 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
32758 parm_regs
= x86_64_int_parameter_registers
;
32759 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
32762 nregs
= ix86_function_regparm (type
, function
);
32764 if (nregs
> 0 && !stdarg_p (type
))
32767 unsigned int ccvt
= ix86_get_callcvt (type
);
32769 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
32770 regno
= aggr
? DX_REG
: CX_REG
;
32771 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
32775 return gen_rtx_MEM (SImode
,
32776 plus_constant (Pmode
, stack_pointer_rtx
, 4));
32785 return gen_rtx_MEM (SImode
,
32786 plus_constant (Pmode
,
32787 stack_pointer_rtx
, 4));
32790 return gen_rtx_REG (SImode
, regno
);
32793 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
32797 /* Determine whether x86_output_mi_thunk can succeed. */
32800 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
32801 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
32802 HOST_WIDE_INT vcall_offset
, const_tree function
)
32804 /* 64-bit can handle anything. */
32808 /* For 32-bit, everything's fine if we have one free register. */
32809 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
32812 /* Need a free register for vcall_offset. */
32816 /* Need a free register for GOT references. */
32817 if (flag_pic
&& !targetm
.binds_local_p (function
))
32820 /* Otherwise ok. */
32824 /* Output the assembler code for a thunk function. THUNK_DECL is the
32825 declaration for the thunk function itself, FUNCTION is the decl for
32826 the target function. DELTA is an immediate constant offset to be
32827 added to THIS. If VCALL_OFFSET is nonzero, the word at
32828 *(*this + vcall_offset) should be added to THIS. */
32831 x86_output_mi_thunk (FILE *file
,
32832 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
32833 HOST_WIDE_INT vcall_offset
, tree function
)
32835 rtx this_param
= x86_this_parameter (function
);
32836 rtx this_reg
, tmp
, fnaddr
;
32838 emit_note (NOTE_INSN_PROLOGUE_END
);
32840 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32841 pull it in now and let DELTA benefit. */
32842 if (REG_P (this_param
))
32843 this_reg
= this_param
;
32844 else if (vcall_offset
)
32846 /* Put the this parameter into %eax. */
32847 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
32848 emit_move_insn (this_reg
, this_param
);
32851 this_reg
= NULL_RTX
;
32853 /* Adjust the this parameter by a fixed constant. */
32856 rtx delta_rtx
= GEN_INT (delta
);
32857 rtx delta_dst
= this_reg
? this_reg
: this_param
;
32861 if (!x86_64_general_operand (delta_rtx
, Pmode
))
32863 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
32864 emit_move_insn (tmp
, delta_rtx
);
32869 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
32872 /* Adjust the this parameter by a value stored in the vtable. */
32875 rtx vcall_addr
, vcall_mem
, this_mem
;
32876 unsigned int tmp_regno
;
32879 tmp_regno
= R10_REG
;
32882 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
32883 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
32884 tmp_regno
= AX_REG
;
32886 tmp_regno
= CX_REG
;
32888 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
32890 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
32891 if (Pmode
!= ptr_mode
)
32892 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
32893 emit_move_insn (tmp
, this_mem
);
32895 /* Adjust the this parameter. */
32896 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
32898 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
32900 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
32901 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
32902 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
32905 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
32906 if (Pmode
!= ptr_mode
)
32907 emit_insn (gen_addsi_1_zext (this_reg
,
32908 gen_rtx_REG (ptr_mode
,
32912 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
32915 /* If necessary, drop THIS back to its stack slot. */
32916 if (this_reg
&& this_reg
!= this_param
)
32917 emit_move_insn (this_param
, this_reg
);
32919 fnaddr
= XEXP (DECL_RTL (function
), 0);
32922 if (!flag_pic
|| targetm
.binds_local_p (function
)
32923 || cfun
->machine
->call_abi
== MS_ABI
)
32927 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
32928 tmp
= gen_rtx_CONST (Pmode
, tmp
);
32929 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
32934 if (!flag_pic
|| targetm
.binds_local_p (function
))
32937 else if (TARGET_MACHO
)
32939 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
32940 fnaddr
= XEXP (fnaddr
, 0);
32942 #endif /* TARGET_MACHO */
32945 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
32946 output_set_got (tmp
, NULL_RTX
);
32948 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
32949 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
32950 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
32954 /* Our sibling call patterns do not allow memories, because we have no
32955 predicate that can distinguish between frame and non-frame memory.
32956 For our purposes here, we can get away with (ab)using a jump pattern,
32957 because we're going to do no optimization. */
32958 if (MEM_P (fnaddr
))
32959 emit_jump_insn (gen_indirect_jump (fnaddr
));
32962 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
32963 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
32964 tmp
= emit_call_insn (tmp
);
32965 SIBLING_CALL_P (tmp
) = 1;
32969 /* Emit just enough of rest_of_compilation to get the insns emitted.
32970 Note that use_thunk calls assemble_start_function et al. */
32971 tmp
= get_insns ();
32972 insn_locators_alloc ();
32973 shorten_branches (tmp
);
32974 final_start_function (tmp
, file
, 1);
32975 final (tmp
, file
, 1);
32976 final_end_function ();
32980 x86_file_start (void)
32982 default_file_start ();
32984 darwin_file_start ();
32986 if (X86_FILE_START_VERSION_DIRECTIVE
)
32987 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
32988 if (X86_FILE_START_FLTUSED
)
32989 fputs ("\t.global\t__fltused\n", asm_out_file
);
32990 if (ix86_asm_dialect
== ASM_INTEL
)
32991 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
32995 x86_field_alignment (tree field
, int computed
)
32997 enum machine_mode mode
;
32998 tree type
= TREE_TYPE (field
);
33000 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
33002 mode
= TYPE_MODE (strip_array_types (type
));
33003 if (mode
== DFmode
|| mode
== DCmode
33004 || GET_MODE_CLASS (mode
) == MODE_INT
33005 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
33006 return MIN (32, computed
);
33010 /* Output assembler code to FILE to increment profiler label # LABELNO
33011 for profiling a function entry. */
33013 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
33015 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
33020 #ifndef NO_PROFILE_COUNTERS
33021 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
33024 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
33025 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
33027 fprintf (file
, "\tcall\t%s\n", mcount_name
);
33031 #ifndef NO_PROFILE_COUNTERS
33032 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
33035 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
33039 #ifndef NO_PROFILE_COUNTERS
33040 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
33043 fprintf (file
, "\tcall\t%s\n", mcount_name
);
33047 /* We don't have exact information about the insn sizes, but we may assume
33048 quite safely that we are informed about all 1 byte insns and memory
33049 address sizes. This is enough to eliminate unnecessary padding in
33053 min_insn_size (rtx insn
)
33057 if (!INSN_P (insn
) || !active_insn_p (insn
))
33060 /* Discard alignments we've emit and jump instructions. */
33061 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
33062 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
33064 if (JUMP_TABLE_DATA_P (insn
))
33067 /* Important case - calls are always 5 bytes.
33068 It is common to have many calls in the row. */
33070 && symbolic_reference_mentioned_p (PATTERN (insn
))
33071 && !SIBLING_CALL_P (insn
))
33073 len
= get_attr_length (insn
);
33077 /* For normal instructions we rely on get_attr_length being exact,
33078 with a few exceptions. */
33079 if (!JUMP_P (insn
))
33081 enum attr_type type
= get_attr_type (insn
);
33086 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
33087 || asm_noperands (PATTERN (insn
)) >= 0)
33094 /* Otherwise trust get_attr_length. */
33098 l
= get_attr_length_address (insn
);
33099 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
33108 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33110 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
33114 ix86_avoid_jump_mispredicts (void)
33116 rtx insn
, start
= get_insns ();
33117 int nbytes
= 0, njumps
= 0;
33120 /* Look for all minimal intervals of instructions containing 4 jumps.
33121 The intervals are bounded by START and INSN. NBYTES is the total
33122 size of instructions in the interval including INSN and not including
33123 START. When the NBYTES is smaller than 16 bytes, it is possible
33124 that the end of START and INSN ends up in the same 16byte page.
33126 The smallest offset in the page INSN can start is the case where START
33127 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
33128 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
33130 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
33134 if (LABEL_P (insn
))
33136 int align
= label_to_alignment (insn
);
33137 int max_skip
= label_to_max_skip (insn
);
33141 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
33142 already in the current 16 byte page, because otherwise
33143 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
33144 bytes to reach 16 byte boundary. */
33146 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
33149 fprintf (dump_file
, "Label %i with max_skip %i\n",
33150 INSN_UID (insn
), max_skip
);
33153 while (nbytes
+ max_skip
>= 16)
33155 start
= NEXT_INSN (start
);
33156 if ((JUMP_P (start
)
33157 && GET_CODE (PATTERN (start
)) != ADDR_VEC
33158 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
33160 njumps
--, isjump
= 1;
33163 nbytes
-= min_insn_size (start
);
33169 min_size
= min_insn_size (insn
);
33170 nbytes
+= min_size
;
33172 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
33173 INSN_UID (insn
), min_size
);
33175 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
33176 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
33184 start
= NEXT_INSN (start
);
33185 if ((JUMP_P (start
)
33186 && GET_CODE (PATTERN (start
)) != ADDR_VEC
33187 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
33189 njumps
--, isjump
= 1;
33192 nbytes
-= min_insn_size (start
);
33194 gcc_assert (njumps
>= 0);
33196 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
33197 INSN_UID (start
), INSN_UID (insn
), nbytes
);
33199 if (njumps
== 3 && isjump
&& nbytes
< 16)
33201 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
33204 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
33205 INSN_UID (insn
), padsize
);
33206 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
33212 /* AMD Athlon works faster
33213 when RET is not destination of conditional jump or directly preceded
33214 by other jump instruction. We avoid the penalty by inserting NOP just
33215 before the RET instructions in such cases. */
33217 ix86_pad_returns (void)
33222 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33224 basic_block bb
= e
->src
;
33225 rtx ret
= BB_END (bb
);
33227 bool replace
= false;
33229 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
33230 || optimize_bb_for_size_p (bb
))
33232 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
33233 if (active_insn_p (prev
) || LABEL_P (prev
))
33235 if (prev
&& LABEL_P (prev
))
33240 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33241 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
33242 && !(e
->flags
& EDGE_FALLTHRU
))
33247 prev
= prev_active_insn (ret
);
33249 && ((JUMP_P (prev
) && any_condjump_p (prev
))
33252 /* Empty functions get branch mispredict even when
33253 the jump destination is not visible to us. */
33254 if (!prev
&& !optimize_function_for_size_p (cfun
))
33259 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
33265 /* Count the minimum number of instructions in BB. Return 4 if the
33266 number of instructions >= 4. */
33269 ix86_count_insn_bb (basic_block bb
)
33272 int insn_count
= 0;
33274 /* Count number of instructions in this block. Return 4 if the number
33275 of instructions >= 4. */
33276 FOR_BB_INSNS (bb
, insn
)
33278 /* Only happen in exit blocks. */
33280 && ANY_RETURN_P (PATTERN (insn
)))
33283 if (NONDEBUG_INSN_P (insn
)
33284 && GET_CODE (PATTERN (insn
)) != USE
33285 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
33288 if (insn_count
>= 4)
33297 /* Count the minimum number of instructions in code path in BB.
33298 Return 4 if the number of instructions >= 4. */
33301 ix86_count_insn (basic_block bb
)
33305 int min_prev_count
;
33307 /* Only bother counting instructions along paths with no
33308 more than 2 basic blocks between entry and exit. Given
33309 that BB has an edge to exit, determine if a predecessor
33310 of BB has an edge from entry. If so, compute the number
33311 of instructions in the predecessor block. If there
33312 happen to be multiple such blocks, compute the minimum. */
33313 min_prev_count
= 4;
33314 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33317 edge_iterator prev_ei
;
33319 if (e
->src
== ENTRY_BLOCK_PTR
)
33321 min_prev_count
= 0;
33324 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
33326 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
33328 int count
= ix86_count_insn_bb (e
->src
);
33329 if (count
< min_prev_count
)
33330 min_prev_count
= count
;
33336 if (min_prev_count
< 4)
33337 min_prev_count
+= ix86_count_insn_bb (bb
);
33339 return min_prev_count
;
33342 /* Pad short function to 4 instructions. */
33345 ix86_pad_short_function (void)
33350 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33352 rtx ret
= BB_END (e
->src
);
33353 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
33355 int insn_count
= ix86_count_insn (e
->src
);
33357 /* Pad short function. */
33358 if (insn_count
< 4)
33362 /* Find epilogue. */
33365 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
33366 insn
= PREV_INSN (insn
);
33371 /* Two NOPs count as one instruction. */
33372 insn_count
= 2 * (4 - insn_count
);
33373 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
33379 /* Implement machine specific optimizations. We implement padding of returns
33380 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
33384 /* We are freeing block_for_insn in the toplev to keep compatibility
33385 with old MDEP_REORGS that are not CFG based. Recompute it now. */
33386 compute_bb_for_insn ();
33388 /* Run the vzeroupper optimization if needed. */
33389 if (TARGET_VZEROUPPER
)
33390 move_or_delete_vzeroupper ();
33392 if (optimize
&& optimize_function_for_speed_p (cfun
))
33394 if (TARGET_PAD_SHORT_FUNCTION
)
33395 ix86_pad_short_function ();
33396 else if (TARGET_PAD_RETURNS
)
33397 ix86_pad_returns ();
33398 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33399 if (TARGET_FOUR_JUMP_LIMIT
)
33400 ix86_avoid_jump_mispredicts ();
33405 /* Return nonzero when QImode register that must be represented via REX prefix
33408 x86_extended_QIreg_mentioned_p (rtx insn
)
33411 extract_insn_cached (insn
);
33412 for (i
= 0; i
< recog_data
.n_operands
; i
++)
33413 if (REG_P (recog_data
.operand
[i
])
33414 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
33419 /* Return nonzero when P points to register encoded via REX prefix.
33420 Called via for_each_rtx. */
33422 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
33424 unsigned int regno
;
33427 regno
= REGNO (*p
);
33428 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
33431 /* Return true when INSN mentions register that must be encoded using REX
33434 x86_extended_reg_mentioned_p (rtx insn
)
33436 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
33437 extended_reg_mentioned_1
, NULL
);
33440 /* If profitable, negate (without causing overflow) integer constant
33441 of mode MODE at location LOC. Return true in this case. */
33443 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
33447 if (!CONST_INT_P (*loc
))
33453 /* DImode x86_64 constants must fit in 32 bits. */
33454 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
33465 gcc_unreachable ();
33468 /* Avoid overflows. */
33469 if (mode_signbit_p (mode
, *loc
))
33472 val
= INTVAL (*loc
);
33474 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
33475 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
33476 if ((val
< 0 && val
!= -128)
33479 *loc
= GEN_INT (-val
);
33486 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
33487 optabs would emit if we didn't have TFmode patterns. */
33490 x86_emit_floatuns (rtx operands
[2])
33492 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
33493 enum machine_mode mode
, inmode
;
33495 inmode
= GET_MODE (operands
[1]);
33496 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
33499 in
= force_reg (inmode
, operands
[1]);
33500 mode
= GET_MODE (out
);
33501 neglab
= gen_label_rtx ();
33502 donelab
= gen_label_rtx ();
33503 f0
= gen_reg_rtx (mode
);
33505 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
33507 expand_float (out
, in
, 0);
33509 emit_jump_insn (gen_jump (donelab
));
33512 emit_label (neglab
);
33514 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
33516 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
33518 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
33520 expand_float (f0
, i0
, 0);
33522 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
33524 emit_label (donelab
);
33527 /* AVX2 does support 32-byte integer vector operations,
33528 thus the longest vector we are faced with is V32QImode. */
33529 #define MAX_VECT_LEN 32
33531 struct expand_vec_perm_d
33533 rtx target
, op0
, op1
;
33534 unsigned char perm
[MAX_VECT_LEN
];
33535 enum machine_mode vmode
;
33536 unsigned char nelt
;
33537 bool one_operand_p
;
33541 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
33542 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
33543 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
33545 /* Get a vector mode of the same size as the original but with elements
33546 twice as wide. This is only guaranteed to apply to integral vectors. */
33548 static inline enum machine_mode
33549 get_mode_wider_vector (enum machine_mode o
)
33551 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
33552 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
33553 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
33554 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
33558 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33559 with all elements equal to VAR. Return true if successful. */
33562 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
33563 rtx target
, rtx val
)
33586 /* First attempt to recognize VAL as-is. */
33587 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33588 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
33589 if (recog_memoized (insn
) < 0)
33592 /* If that fails, force VAL into a register. */
33595 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
33596 seq
= get_insns ();
33599 emit_insn_before (seq
, insn
);
33601 ok
= recog_memoized (insn
) >= 0;
33610 if (TARGET_SSE
|| TARGET_3DNOW_A
)
33614 val
= gen_lowpart (SImode
, val
);
33615 x
= gen_rtx_TRUNCATE (HImode
, val
);
33616 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
33617 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33630 struct expand_vec_perm_d dperm
;
33634 memset (&dperm
, 0, sizeof (dperm
));
33635 dperm
.target
= target
;
33636 dperm
.vmode
= mode
;
33637 dperm
.nelt
= GET_MODE_NUNITS (mode
);
33638 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
33639 dperm
.one_operand_p
= true;
33641 /* Extend to SImode using a paradoxical SUBREG. */
33642 tmp1
= gen_reg_rtx (SImode
);
33643 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
33645 /* Insert the SImode value as low element of a V4SImode vector. */
33646 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
33647 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
33649 ok
= (expand_vec_perm_1 (&dperm
)
33650 || expand_vec_perm_broadcast_1 (&dperm
));
33662 /* Replicate the value once into the next wider mode and recurse. */
33664 enum machine_mode smode
, wsmode
, wvmode
;
33667 smode
= GET_MODE_INNER (mode
);
33668 wvmode
= get_mode_wider_vector (mode
);
33669 wsmode
= GET_MODE_INNER (wvmode
);
33671 val
= convert_modes (wsmode
, smode
, val
, true);
33672 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
33673 GEN_INT (GET_MODE_BITSIZE (smode
)),
33674 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33675 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
33677 x
= gen_lowpart (wvmode
, target
);
33678 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
33686 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
33687 rtx x
= gen_reg_rtx (hvmode
);
33689 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
33692 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
33693 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33702 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33703 whose ONE_VAR element is VAR, and other elements are zero. Return true
33707 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
33708 rtx target
, rtx var
, int one_var
)
33710 enum machine_mode vsimode
;
33713 bool use_vector_set
= false;
33718 /* For SSE4.1, we normally use vector set. But if the second
33719 element is zero and inter-unit moves are OK, we use movq
33721 use_vector_set
= (TARGET_64BIT
33723 && !(TARGET_INTER_UNIT_MOVES
33729 use_vector_set
= TARGET_SSE4_1
;
33732 use_vector_set
= TARGET_SSE2
;
33735 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
33742 use_vector_set
= TARGET_AVX
;
33745 /* Use ix86_expand_vector_set in 64bit mode only. */
33746 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
33752 if (use_vector_set
)
33754 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
33755 var
= force_reg (GET_MODE_INNER (mode
), var
);
33756 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33772 var
= force_reg (GET_MODE_INNER (mode
), var
);
33773 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
33774 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33779 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
33780 new_target
= gen_reg_rtx (mode
);
33782 new_target
= target
;
33783 var
= force_reg (GET_MODE_INNER (mode
), var
);
33784 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
33785 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
33786 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
33789 /* We need to shuffle the value to the correct position, so
33790 create a new pseudo to store the intermediate result. */
33792 /* With SSE2, we can use the integer shuffle insns. */
33793 if (mode
!= V4SFmode
&& TARGET_SSE2
)
33795 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
33797 GEN_INT (one_var
== 1 ? 0 : 1),
33798 GEN_INT (one_var
== 2 ? 0 : 1),
33799 GEN_INT (one_var
== 3 ? 0 : 1)));
33800 if (target
!= new_target
)
33801 emit_move_insn (target
, new_target
);
33805 /* Otherwise convert the intermediate result to V4SFmode and
33806 use the SSE1 shuffle instructions. */
33807 if (mode
!= V4SFmode
)
33809 tmp
= gen_reg_rtx (V4SFmode
);
33810 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
33815 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
33817 GEN_INT (one_var
== 1 ? 0 : 1),
33818 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
33819 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
33821 if (mode
!= V4SFmode
)
33822 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
33823 else if (tmp
!= target
)
33824 emit_move_insn (target
, tmp
);
33826 else if (target
!= new_target
)
33827 emit_move_insn (target
, new_target
);
33832 vsimode
= V4SImode
;
33838 vsimode
= V2SImode
;
33844 /* Zero extend the variable element to SImode and recurse. */
33845 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
33847 x
= gen_reg_rtx (vsimode
);
33848 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
33850 gcc_unreachable ();
33852 emit_move_insn (target
, gen_lowpart (mode
, x
));
33860 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33861 consisting of the values in VALS. It is known that all elements
33862 except ONE_VAR are constants. Return true if successful. */
33865 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
33866 rtx target
, rtx vals
, int one_var
)
33868 rtx var
= XVECEXP (vals
, 0, one_var
);
33869 enum machine_mode wmode
;
33872 const_vec
= copy_rtx (vals
);
33873 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
33874 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
33882 /* For the two element vectors, it's just as easy to use
33883 the general case. */
33887 /* Use ix86_expand_vector_set in 64bit mode only. */
33910 /* There's no way to set one QImode entry easily. Combine
33911 the variable value with its adjacent constant value, and
33912 promote to an HImode set. */
33913 x
= XVECEXP (vals
, 0, one_var
^ 1);
33916 var
= convert_modes (HImode
, QImode
, var
, true);
33917 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
33918 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33919 x
= GEN_INT (INTVAL (x
) & 0xff);
33923 var
= convert_modes (HImode
, QImode
, var
, true);
33924 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
33926 if (x
!= const0_rtx
)
33927 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
33928 1, OPTAB_LIB_WIDEN
);
33930 x
= gen_reg_rtx (wmode
);
33931 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
33932 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
33934 emit_move_insn (target
, gen_lowpart (mode
, x
));
33941 emit_move_insn (target
, const_vec
);
33942 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33946 /* A subroutine of ix86_expand_vector_init_general. Use vector
33947 concatenate to handle the most general case: all values variable,
33948 and none identical. */
33951 ix86_expand_vector_init_concat (enum machine_mode mode
,
33952 rtx target
, rtx
*ops
, int n
)
33954 enum machine_mode cmode
, hmode
= VOIDmode
;
33955 rtx first
[8], second
[4];
33995 gcc_unreachable ();
33998 if (!register_operand (ops
[1], cmode
))
33999 ops
[1] = force_reg (cmode
, ops
[1]);
34000 if (!register_operand (ops
[0], cmode
))
34001 ops
[0] = force_reg (cmode
, ops
[0]);
34002 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34003 gen_rtx_VEC_CONCAT (mode
, ops
[0],
34023 gcc_unreachable ();
34039 gcc_unreachable ();
34044 /* FIXME: We process inputs backward to help RA. PR 36222. */
34047 for (; i
> 0; i
-= 2, j
--)
34049 first
[j
] = gen_reg_rtx (cmode
);
34050 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
34051 ix86_expand_vector_init (false, first
[j
],
34052 gen_rtx_PARALLEL (cmode
, v
));
34058 gcc_assert (hmode
!= VOIDmode
);
34059 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
34061 second
[j
] = gen_reg_rtx (hmode
);
34062 ix86_expand_vector_init_concat (hmode
, second
[j
],
34066 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
34069 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
34073 gcc_unreachable ();
34077 /* A subroutine of ix86_expand_vector_init_general. Use vector
34078 interleave to handle the most general case: all values variable,
34079 and none identical. */
34082 ix86_expand_vector_init_interleave (enum machine_mode mode
,
34083 rtx target
, rtx
*ops
, int n
)
34085 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
34088 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
34089 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
34090 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
34095 gen_load_even
= gen_vec_setv8hi
;
34096 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
34097 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
34098 inner_mode
= HImode
;
34099 first_imode
= V4SImode
;
34100 second_imode
= V2DImode
;
34101 third_imode
= VOIDmode
;
34104 gen_load_even
= gen_vec_setv16qi
;
34105 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
34106 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
34107 inner_mode
= QImode
;
34108 first_imode
= V8HImode
;
34109 second_imode
= V4SImode
;
34110 third_imode
= V2DImode
;
34113 gcc_unreachable ();
34116 for (i
= 0; i
< n
; i
++)
34118 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
34119 op0
= gen_reg_rtx (SImode
);
34120 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
34122 /* Insert the SImode value as low element of V4SImode vector. */
34123 op1
= gen_reg_rtx (V4SImode
);
34124 op0
= gen_rtx_VEC_MERGE (V4SImode
,
34125 gen_rtx_VEC_DUPLICATE (V4SImode
,
34127 CONST0_RTX (V4SImode
),
34129 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
34131 /* Cast the V4SImode vector back to a vector in orignal mode. */
34132 op0
= gen_reg_rtx (mode
);
34133 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
34135 /* Load even elements into the second positon. */
34136 emit_insn (gen_load_even (op0
,
34137 force_reg (inner_mode
,
34141 /* Cast vector to FIRST_IMODE vector. */
34142 ops
[i
] = gen_reg_rtx (first_imode
);
34143 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
34146 /* Interleave low FIRST_IMODE vectors. */
34147 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
34149 op0
= gen_reg_rtx (first_imode
);
34150 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
34152 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
34153 ops
[j
] = gen_reg_rtx (second_imode
);
34154 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
34157 /* Interleave low SECOND_IMODE vectors. */
34158 switch (second_imode
)
34161 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
34163 op0
= gen_reg_rtx (second_imode
);
34164 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
34167 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
34169 ops
[j
] = gen_reg_rtx (third_imode
);
34170 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
34172 second_imode
= V2DImode
;
34173 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
34177 op0
= gen_reg_rtx (second_imode
);
34178 emit_insn (gen_interleave_second_low (op0
, ops
[0],
34181 /* Cast the SECOND_IMODE vector back to a vector on original
34183 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34184 gen_lowpart (mode
, op0
)));
34188 gcc_unreachable ();
34192 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
34193 all values variable, and none identical. */
34196 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
34197 rtx target
, rtx vals
)
34199 rtx ops
[32], op0
, op1
;
34200 enum machine_mode half_mode
= VOIDmode
;
34207 if (!mmx_ok
&& !TARGET_SSE
)
34219 n
= GET_MODE_NUNITS (mode
);
34220 for (i
= 0; i
< n
; i
++)
34221 ops
[i
] = XVECEXP (vals
, 0, i
);
34222 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
34226 half_mode
= V16QImode
;
34230 half_mode
= V8HImode
;
34234 n
= GET_MODE_NUNITS (mode
);
34235 for (i
= 0; i
< n
; i
++)
34236 ops
[i
] = XVECEXP (vals
, 0, i
);
34237 op0
= gen_reg_rtx (half_mode
);
34238 op1
= gen_reg_rtx (half_mode
);
34239 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
34241 ix86_expand_vector_init_interleave (half_mode
, op1
,
34242 &ops
[n
>> 1], n
>> 2);
34243 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34244 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
34248 if (!TARGET_SSE4_1
)
34256 /* Don't use ix86_expand_vector_init_interleave if we can't
34257 move from GPR to SSE register directly. */
34258 if (!TARGET_INTER_UNIT_MOVES
)
34261 n
= GET_MODE_NUNITS (mode
);
34262 for (i
= 0; i
< n
; i
++)
34263 ops
[i
] = XVECEXP (vals
, 0, i
);
34264 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
34272 gcc_unreachable ();
34276 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
34277 enum machine_mode inner_mode
;
34278 rtx words
[4], shift
;
34280 inner_mode
= GET_MODE_INNER (mode
);
34281 n_elts
= GET_MODE_NUNITS (mode
);
34282 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
34283 n_elt_per_word
= n_elts
/ n_words
;
34284 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
34286 for (i
= 0; i
< n_words
; ++i
)
34288 rtx word
= NULL_RTX
;
34290 for (j
= 0; j
< n_elt_per_word
; ++j
)
34292 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
34293 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
34299 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
34300 word
, 1, OPTAB_LIB_WIDEN
);
34301 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
34302 word
, 1, OPTAB_LIB_WIDEN
);
34310 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
34311 else if (n_words
== 2)
34313 rtx tmp
= gen_reg_rtx (mode
);
34314 emit_clobber (tmp
);
34315 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
34316 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
34317 emit_move_insn (target
, tmp
);
34319 else if (n_words
== 4)
34321 rtx tmp
= gen_reg_rtx (V4SImode
);
34322 gcc_assert (word_mode
== SImode
);
34323 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
34324 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
34325 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
34328 gcc_unreachable ();
34332 /* Initialize vector TARGET via VALS. Suppress the use of MMX
34333 instructions unless MMX_OK is true. */
34336 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
34338 enum machine_mode mode
= GET_MODE (target
);
34339 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34340 int n_elts
= GET_MODE_NUNITS (mode
);
34341 int n_var
= 0, one_var
= -1;
34342 bool all_same
= true, all_const_zero
= true;
34346 for (i
= 0; i
< n_elts
; ++i
)
34348 x
= XVECEXP (vals
, 0, i
);
34349 if (!(CONST_INT_P (x
)
34350 || GET_CODE (x
) == CONST_DOUBLE
34351 || GET_CODE (x
) == CONST_FIXED
))
34352 n_var
++, one_var
= i
;
34353 else if (x
!= CONST0_RTX (inner_mode
))
34354 all_const_zero
= false;
34355 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
34359 /* Constants are best loaded from the constant pool. */
34362 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
34366 /* If all values are identical, broadcast the value. */
34368 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
34369 XVECEXP (vals
, 0, 0)))
34372 /* Values where only one field is non-constant are best loaded from
34373 the pool and overwritten via move later. */
34377 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
34378 XVECEXP (vals
, 0, one_var
),
34382 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
34386 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
34390 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
34392 enum machine_mode mode
= GET_MODE (target
);
34393 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34394 enum machine_mode half_mode
;
34395 bool use_vec_merge
= false;
34397 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
34399 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
34400 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
34401 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
34402 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
34403 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
34404 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
34406 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
34408 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
34409 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
34410 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
34411 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
34412 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
34413 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
34423 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34424 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
34426 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34428 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34429 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34435 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
34439 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34440 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
34442 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34444 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34445 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34452 /* For the two element vectors, we implement a VEC_CONCAT with
34453 the extraction of the other element. */
34455 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
34456 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
34459 op0
= val
, op1
= tmp
;
34461 op0
= tmp
, op1
= val
;
34463 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
34464 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34469 use_vec_merge
= TARGET_SSE4_1
;
34476 use_vec_merge
= true;
34480 /* tmp = target = A B C D */
34481 tmp
= copy_to_reg (target
);
34482 /* target = A A B B */
34483 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
34484 /* target = X A B B */
34485 ix86_expand_vector_set (false, target
, val
, 0);
34486 /* target = A X C D */
34487 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34488 const1_rtx
, const0_rtx
,
34489 GEN_INT (2+4), GEN_INT (3+4)));
34493 /* tmp = target = A B C D */
34494 tmp
= copy_to_reg (target
);
34495 /* tmp = X B C D */
34496 ix86_expand_vector_set (false, tmp
, val
, 0);
34497 /* target = A B X D */
34498 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34499 const0_rtx
, const1_rtx
,
34500 GEN_INT (0+4), GEN_INT (3+4)));
34504 /* tmp = target = A B C D */
34505 tmp
= copy_to_reg (target
);
34506 /* tmp = X B C D */
34507 ix86_expand_vector_set (false, tmp
, val
, 0);
34508 /* target = A B X D */
34509 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34510 const0_rtx
, const1_rtx
,
34511 GEN_INT (2+4), GEN_INT (0+4)));
34515 gcc_unreachable ();
34520 use_vec_merge
= TARGET_SSE4_1
;
34524 /* Element 0 handled by vec_merge below. */
34527 use_vec_merge
= true;
34533 /* With SSE2, use integer shuffles to swap element 0 and ELT,
34534 store into element 0, then shuffle them back. */
34538 order
[0] = GEN_INT (elt
);
34539 order
[1] = const1_rtx
;
34540 order
[2] = const2_rtx
;
34541 order
[3] = GEN_INT (3);
34542 order
[elt
] = const0_rtx
;
34544 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34545 order
[1], order
[2], order
[3]));
34547 ix86_expand_vector_set (false, target
, val
, 0);
34549 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34550 order
[1], order
[2], order
[3]));
34554 /* For SSE1, we have to reuse the V4SF code. */
34555 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
34556 gen_lowpart (SFmode
, val
), elt
);
34561 use_vec_merge
= TARGET_SSE2
;
34564 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34568 use_vec_merge
= TARGET_SSE4_1
;
34575 half_mode
= V16QImode
;
34581 half_mode
= V8HImode
;
34587 half_mode
= V4SImode
;
34593 half_mode
= V2DImode
;
34599 half_mode
= V4SFmode
;
34605 half_mode
= V2DFmode
;
34611 /* Compute offset. */
34615 gcc_assert (i
<= 1);
34617 /* Extract the half. */
34618 tmp
= gen_reg_rtx (half_mode
);
34619 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
34621 /* Put val in tmp at elt. */
34622 ix86_expand_vector_set (false, tmp
, val
, elt
);
34625 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
34634 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
34635 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
34636 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34640 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
34642 emit_move_insn (mem
, target
);
34644 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34645 emit_move_insn (tmp
, val
);
34647 emit_move_insn (target
, mem
);
34652 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
34654 enum machine_mode mode
= GET_MODE (vec
);
34655 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34656 bool use_vec_extr
= false;
34669 use_vec_extr
= true;
34673 use_vec_extr
= TARGET_SSE4_1
;
34685 tmp
= gen_reg_rtx (mode
);
34686 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
34687 GEN_INT (elt
), GEN_INT (elt
),
34688 GEN_INT (elt
+4), GEN_INT (elt
+4)));
34692 tmp
= gen_reg_rtx (mode
);
34693 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
34697 gcc_unreachable ();
34700 use_vec_extr
= true;
34705 use_vec_extr
= TARGET_SSE4_1
;
34719 tmp
= gen_reg_rtx (mode
);
34720 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
34721 GEN_INT (elt
), GEN_INT (elt
),
34722 GEN_INT (elt
), GEN_INT (elt
)));
34726 tmp
= gen_reg_rtx (mode
);
34727 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
34731 gcc_unreachable ();
34734 use_vec_extr
= true;
34739 /* For SSE1, we have to reuse the V4SF code. */
34740 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
34741 gen_lowpart (V4SFmode
, vec
), elt
);
34747 use_vec_extr
= TARGET_SSE2
;
34750 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34754 use_vec_extr
= TARGET_SSE4_1
;
34760 tmp
= gen_reg_rtx (V4SFmode
);
34762 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
34764 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
34765 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34773 tmp
= gen_reg_rtx (V2DFmode
);
34775 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
34777 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
34778 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34786 tmp
= gen_reg_rtx (V16QImode
);
34788 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
34790 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
34791 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
34799 tmp
= gen_reg_rtx (V8HImode
);
34801 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
34803 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
34804 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
34812 tmp
= gen_reg_rtx (V4SImode
);
34814 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
34816 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
34817 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34825 tmp
= gen_reg_rtx (V2DImode
);
34827 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
34829 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
34830 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34836 /* ??? Could extract the appropriate HImode element and shift. */
34843 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
34844 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
34846 /* Let the rtl optimizers know about the zero extension performed. */
34847 if (inner_mode
== QImode
|| inner_mode
== HImode
)
34849 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
34850 target
= gen_lowpart (SImode
, target
);
34853 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34857 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
34859 emit_move_insn (mem
, vec
);
34861 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34862 emit_move_insn (target
, tmp
);
34866 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34867 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34868 The upper bits of DEST are undefined, though they shouldn't cause
34869 exceptions (some bits from src or all zeros are ok). */
34872 emit_reduc_half (rtx dest
, rtx src
, int i
)
34875 switch (GET_MODE (src
))
34879 tem
= gen_sse_movhlps (dest
, src
, src
);
34881 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
34882 GEN_INT (1 + 4), GEN_INT (1 + 4));
34885 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
34891 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
34892 gen_lowpart (V1TImode
, src
),
34897 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
34899 tem
= gen_avx_shufps256 (dest
, src
, src
,
34900 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
34904 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
34906 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
34913 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
34914 gen_lowpart (V4DImode
, src
),
34915 gen_lowpart (V4DImode
, src
),
34918 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
34919 gen_lowpart (V2TImode
, src
),
34923 gcc_unreachable ();
34928 /* Expand a vector reduction. FN is the binary pattern to reduce;
34929 DEST is the destination; IN is the input vector. */
34932 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
34934 rtx half
, dst
, vec
= in
;
34935 enum machine_mode mode
= GET_MODE (in
);
34938 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34940 && mode
== V8HImode
34941 && fn
== gen_uminv8hi3
)
34943 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
34947 for (i
= GET_MODE_BITSIZE (mode
);
34948 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
34951 half
= gen_reg_rtx (mode
);
34952 emit_reduc_half (half
, vec
, i
);
34953 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
34956 dst
= gen_reg_rtx (mode
);
34957 emit_insn (fn (dst
, half
, vec
));
34962 /* Target hook for scalar_mode_supported_p. */
34964 ix86_scalar_mode_supported_p (enum machine_mode mode
)
34966 if (DECIMAL_FLOAT_MODE_P (mode
))
34967 return default_decimal_float_supported_p ();
34968 else if (mode
== TFmode
)
34971 return default_scalar_mode_supported_p (mode
);
34974 /* Implements target hook vector_mode_supported_p. */
34976 ix86_vector_mode_supported_p (enum machine_mode mode
)
34978 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34980 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34982 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34984 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
34986 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
34991 /* Target hook for c_mode_for_suffix. */
34992 static enum machine_mode
34993 ix86_c_mode_for_suffix (char suffix
)
35003 /* Worker function for TARGET_MD_ASM_CLOBBERS.
35005 We do this in the new i386 backend to maintain source compatibility
35006 with the old cc0-based compiler. */
35009 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
35010 tree inputs ATTRIBUTE_UNUSED
,
35013 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
35015 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
35020 /* Implements target vector targetm.asm.encode_section_info. */
35022 static void ATTRIBUTE_UNUSED
35023 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
35025 default_encode_section_info (decl
, rtl
, first
);
35027 if (TREE_CODE (decl
) == VAR_DECL
35028 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
35029 && ix86_in_large_data_p (decl
))
35030 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
35033 /* Worker function for REVERSE_CONDITION. */
35036 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
35038 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
35039 ? reverse_condition (code
)
35040 : reverse_condition_maybe_unordered (code
));
35043 /* Output code to perform an x87 FP register move, from OPERANDS[1]
35047 output_387_reg_move (rtx insn
, rtx
*operands
)
35049 if (REG_P (operands
[0]))
35051 if (REG_P (operands
[1])
35052 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
35054 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
35055 return output_387_ffreep (operands
, 0);
35056 return "fstp\t%y0";
35058 if (STACK_TOP_P (operands
[0]))
35059 return "fld%Z1\t%y1";
35062 else if (MEM_P (operands
[0]))
35064 gcc_assert (REG_P (operands
[1]));
35065 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
35066 return "fstp%Z0\t%y0";
35069 /* There is no non-popping store to memory for XFmode.
35070 So if we need one, follow the store with a load. */
35071 if (GET_MODE (operands
[0]) == XFmode
)
35072 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
35074 return "fst%Z0\t%y0";
35081 /* Output code to perform a conditional jump to LABEL, if C2 flag in
35082 FP status register is set. */
35085 ix86_emit_fp_unordered_jump (rtx label
)
35087 rtx reg
= gen_reg_rtx (HImode
);
35090 emit_insn (gen_x86_fnstsw_1 (reg
));
35092 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
35094 emit_insn (gen_x86_sahf_1 (reg
));
35096 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
35097 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
35101 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
35103 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
35104 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
35107 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
35108 gen_rtx_LABEL_REF (VOIDmode
, label
),
35110 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
35112 emit_jump_insn (temp
);
35113 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
35116 /* Output code to perform a log1p XFmode calculation. */
35118 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
35120 rtx label1
= gen_label_rtx ();
35121 rtx label2
= gen_label_rtx ();
35123 rtx tmp
= gen_reg_rtx (XFmode
);
35124 rtx tmp2
= gen_reg_rtx (XFmode
);
35127 emit_insn (gen_absxf2 (tmp
, op1
));
35128 test
= gen_rtx_GE (VOIDmode
, tmp
,
35129 CONST_DOUBLE_FROM_REAL_VALUE (
35130 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
35132 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
35134 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
35135 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
35136 emit_jump (label2
);
35138 emit_label (label1
);
35139 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
35140 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
35141 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
35142 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
35144 emit_label (label2
);
35147 /* Emit code for round calculation. */
35148 void ix86_emit_i387_round (rtx op0
, rtx op1
)
35150 enum machine_mode inmode
= GET_MODE (op1
);
35151 enum machine_mode outmode
= GET_MODE (op0
);
35152 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
35153 rtx scratch
= gen_reg_rtx (HImode
);
35154 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
35155 rtx jump_label
= gen_label_rtx ();
35157 rtx (*gen_abs
) (rtx
, rtx
);
35158 rtx (*gen_neg
) (rtx
, rtx
);
35163 gen_abs
= gen_abssf2
;
35166 gen_abs
= gen_absdf2
;
35169 gen_abs
= gen_absxf2
;
35172 gcc_unreachable ();
35178 gen_neg
= gen_negsf2
;
35181 gen_neg
= gen_negdf2
;
35184 gen_neg
= gen_negxf2
;
35187 gen_neg
= gen_neghi2
;
35190 gen_neg
= gen_negsi2
;
35193 gen_neg
= gen_negdi2
;
35196 gcc_unreachable ();
35199 e1
= gen_reg_rtx (inmode
);
35200 e2
= gen_reg_rtx (inmode
);
35201 res
= gen_reg_rtx (outmode
);
35203 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
35205 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
35207 /* scratch = fxam(op1) */
35208 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
35209 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
35211 /* e1 = fabs(op1) */
35212 emit_insn (gen_abs (e1
, op1
));
35214 /* e2 = e1 + 0.5 */
35215 half
= force_reg (inmode
, half
);
35216 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35217 gen_rtx_PLUS (inmode
, e1
, half
)));
35219 /* res = floor(e2) */
35220 if (inmode
!= XFmode
)
35222 tmp1
= gen_reg_rtx (XFmode
);
35224 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
35225 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
35235 rtx tmp0
= gen_reg_rtx (XFmode
);
35237 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
35239 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35240 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
35241 UNSPEC_TRUNC_NOOP
)));
35245 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
35248 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
35251 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
35254 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
35257 gcc_unreachable ();
35260 /* flags = signbit(a) */
35261 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
35263 /* if (flags) then res = -res */
35264 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
35265 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
35266 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
35268 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35269 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
35270 JUMP_LABEL (insn
) = jump_label
;
35272 emit_insn (gen_neg (res
, res
));
35274 emit_label (jump_label
);
35275 LABEL_NUSES (jump_label
) = 1;
35277 emit_move_insn (op0
, res
);
35280 /* Output code to perform a Newton-Rhapson approximation of a single precision
35281 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
35283 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
35285 rtx x0
, x1
, e0
, e1
;
35287 x0
= gen_reg_rtx (mode
);
35288 e0
= gen_reg_rtx (mode
);
35289 e1
= gen_reg_rtx (mode
);
35290 x1
= gen_reg_rtx (mode
);
35292 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
35294 b
= force_reg (mode
, b
);
35296 /* x0 = rcp(b) estimate */
35297 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35298 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
35301 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35302 gen_rtx_MULT (mode
, x0
, b
)));
35305 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35306 gen_rtx_MULT (mode
, x0
, e0
)));
35309 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35310 gen_rtx_PLUS (mode
, x0
, x0
)));
35313 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
35314 gen_rtx_MINUS (mode
, e1
, e0
)));
35317 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35318 gen_rtx_MULT (mode
, a
, x1
)));
35321 /* Output code to perform a Newton-Rhapson approximation of a
35322 single precision floating point [reciprocal] square root. */
35324 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
35327 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
35330 x0
= gen_reg_rtx (mode
);
35331 e0
= gen_reg_rtx (mode
);
35332 e1
= gen_reg_rtx (mode
);
35333 e2
= gen_reg_rtx (mode
);
35334 e3
= gen_reg_rtx (mode
);
35336 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
35337 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35339 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
35340 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35342 if (VECTOR_MODE_P (mode
))
35344 mthree
= ix86_build_const_vector (mode
, true, mthree
);
35345 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
35348 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
35349 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
35351 a
= force_reg (mode
, a
);
35353 /* x0 = rsqrt(a) estimate */
35354 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35355 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
35358 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
35363 zero
= gen_reg_rtx (mode
);
35364 mask
= gen_reg_rtx (mode
);
35366 zero
= force_reg (mode
, CONST0_RTX(mode
));
35367 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35368 gen_rtx_NE (mode
, zero
, a
)));
35370 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35371 gen_rtx_AND (mode
, x0
, mask
)));
35375 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35376 gen_rtx_MULT (mode
, x0
, a
)));
35378 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35379 gen_rtx_MULT (mode
, e0
, x0
)));
35382 mthree
= force_reg (mode
, mthree
);
35383 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35384 gen_rtx_PLUS (mode
, e1
, mthree
)));
35386 mhalf
= force_reg (mode
, mhalf
);
35388 /* e3 = -.5 * x0 */
35389 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35390 gen_rtx_MULT (mode
, x0
, mhalf
)));
35392 /* e3 = -.5 * e0 */
35393 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35394 gen_rtx_MULT (mode
, e0
, mhalf
)));
35395 /* ret = e2 * e3 */
35396 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35397 gen_rtx_MULT (mode
, e2
, e3
)));
35400 #ifdef TARGET_SOLARIS
35401 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
35404 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
35407 /* With Binutils 2.15, the "@unwind" marker must be specified on
35408 every occurrence of the ".eh_frame" section, not just the first
35411 && strcmp (name
, ".eh_frame") == 0)
35413 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
35414 flags
& SECTION_WRITE
? "aw" : "a");
35419 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
35421 solaris_elf_asm_comdat_section (name
, flags
, decl
);
35426 default_elf_asm_named_section (name
, flags
, decl
);
35428 #endif /* TARGET_SOLARIS */
35430 /* Return the mangling of TYPE if it is an extended fundamental type. */
35432 static const char *
35433 ix86_mangle_type (const_tree type
)
35435 type
= TYPE_MAIN_VARIANT (type
);
35437 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
35438 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
35441 switch (TYPE_MODE (type
))
35444 /* __float128 is "g". */
35447 /* "long double" or __float80 is "e". */
35454 /* For 32-bit code we can save PIC register setup by using
35455 __stack_chk_fail_local hidden function instead of calling
35456 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
35457 register, so it is better to call __stack_chk_fail directly. */
35459 static tree ATTRIBUTE_UNUSED
35460 ix86_stack_protect_fail (void)
35462 return TARGET_64BIT
35463 ? default_external_stack_protect_fail ()
35464 : default_hidden_stack_protect_fail ();
35467 /* Select a format to encode pointers in exception handling data. CODE
35468 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
35469 true if the symbol may be affected by dynamic relocations.
35471 ??? All x86 object file formats are capable of representing this.
35472 After all, the relocation needed is the same as for the call insn.
35473 Whether or not a particular assembler allows us to enter such, I
35474 guess we'll have to see. */
35476 asm_preferred_eh_data_format (int code
, int global
)
35480 int type
= DW_EH_PE_sdata8
;
35482 || ix86_cmodel
== CM_SMALL_PIC
35483 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
35484 type
= DW_EH_PE_sdata4
;
35485 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
35487 if (ix86_cmodel
== CM_SMALL
35488 || (ix86_cmodel
== CM_MEDIUM
&& code
))
35489 return DW_EH_PE_udata4
;
35490 return DW_EH_PE_absptr
;
35493 /* Expand copysign from SIGN to the positive value ABS_VALUE
35494 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
35497 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
35499 enum machine_mode mode
= GET_MODE (sign
);
35500 rtx sgn
= gen_reg_rtx (mode
);
35501 if (mask
== NULL_RTX
)
35503 enum machine_mode vmode
;
35505 if (mode
== SFmode
)
35507 else if (mode
== DFmode
)
35512 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
35513 if (!VECTOR_MODE_P (mode
))
35515 /* We need to generate a scalar mode mask in this case. */
35516 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35517 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35518 mask
= gen_reg_rtx (mode
);
35519 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35523 mask
= gen_rtx_NOT (mode
, mask
);
35524 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
35525 gen_rtx_AND (mode
, mask
, sign
)));
35526 emit_insn (gen_rtx_SET (VOIDmode
, result
,
35527 gen_rtx_IOR (mode
, abs_value
, sgn
)));
35530 /* Expand fabs (OP0) and return a new rtx that holds the result. The
35531 mask for masking out the sign-bit is stored in *SMASK, if that is
35534 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
35536 enum machine_mode vmode
, mode
= GET_MODE (op0
);
35539 xa
= gen_reg_rtx (mode
);
35540 if (mode
== SFmode
)
35542 else if (mode
== DFmode
)
35546 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
35547 if (!VECTOR_MODE_P (mode
))
35549 /* We need to generate a scalar mode mask in this case. */
35550 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35551 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35552 mask
= gen_reg_rtx (mode
);
35553 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35555 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
35556 gen_rtx_AND (mode
, op0
, mask
)));
35564 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
35565 swapping the operands if SWAP_OPERANDS is true. The expanded
35566 code is a forward jump to a newly created label in case the
35567 comparison is true. The generated label rtx is returned. */
35569 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
35570 bool swap_operands
)
35581 label
= gen_label_rtx ();
35582 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
35583 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35584 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
35585 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
35586 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
35587 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
35588 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35589 JUMP_LABEL (tmp
) = label
;
35594 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
35595 using comparison code CODE. Operands are swapped for the comparison if
35596 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
35598 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
35599 bool swap_operands
)
35601 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
35602 enum machine_mode mode
= GET_MODE (op0
);
35603 rtx mask
= gen_reg_rtx (mode
);
35612 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
35614 emit_insn (insn (mask
, op0
, op1
,
35615 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
35619 /* Generate and return a rtx of mode MODE for 2**n where n is the number
35620 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
35622 ix86_gen_TWO52 (enum machine_mode mode
)
35624 REAL_VALUE_TYPE TWO52r
;
35627 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
35628 TWO52
= const_double_from_real_value (TWO52r
, mode
);
35629 TWO52
= force_reg (mode
, TWO52
);
35634 /* Expand SSE sequence for computing lround from OP1 storing
35637 ix86_expand_lround (rtx op0
, rtx op1
)
35639 /* C code for the stuff we're doing below:
35640 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
35643 enum machine_mode mode
= GET_MODE (op1
);
35644 const struct real_format
*fmt
;
35645 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35648 /* load nextafter (0.5, 0.0) */
35649 fmt
= REAL_MODE_FORMAT (mode
);
35650 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35651 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35653 /* adj = copysign (0.5, op1) */
35654 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35655 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
35657 /* adj = op1 + adj */
35658 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35660 /* op0 = (imode)adj */
35661 expand_fix (op0
, adj
, 0);
35664 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35667 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
35669 /* C code for the stuff we're doing below (for do_floor):
35671 xi -= (double)xi > op1 ? 1 : 0;
35674 enum machine_mode fmode
= GET_MODE (op1
);
35675 enum machine_mode imode
= GET_MODE (op0
);
35676 rtx ireg
, freg
, label
, tmp
;
35678 /* reg = (long)op1 */
35679 ireg
= gen_reg_rtx (imode
);
35680 expand_fix (ireg
, op1
, 0);
35682 /* freg = (double)reg */
35683 freg
= gen_reg_rtx (fmode
);
35684 expand_float (freg
, ireg
, 0);
35686 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35687 label
= ix86_expand_sse_compare_and_jump (UNLE
,
35688 freg
, op1
, !do_floor
);
35689 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
35690 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
35691 emit_move_insn (ireg
, tmp
);
35693 emit_label (label
);
35694 LABEL_NUSES (label
) = 1;
35696 emit_move_insn (op0
, ireg
);
35699 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35700 result in OPERAND0. */
35702 ix86_expand_rint (rtx operand0
, rtx operand1
)
35704 /* C code for the stuff we're doing below:
35705 xa = fabs (operand1);
35706 if (!isless (xa, 2**52))
35708 xa = xa + 2**52 - 2**52;
35709 return copysign (xa, operand1);
35711 enum machine_mode mode
= GET_MODE (operand0
);
35712 rtx res
, xa
, label
, TWO52
, mask
;
35714 res
= gen_reg_rtx (mode
);
35715 emit_move_insn (res
, operand1
);
35717 /* xa = abs (operand1) */
35718 xa
= ix86_expand_sse_fabs (res
, &mask
);
35720 /* if (!isless (xa, TWO52)) goto label; */
35721 TWO52
= ix86_gen_TWO52 (mode
);
35722 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35724 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35725 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35727 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
35729 emit_label (label
);
35730 LABEL_NUSES (label
) = 1;
35732 emit_move_insn (operand0
, res
);
35735 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35738 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
35740 /* C code for the stuff we expand below.
35741 double xa = fabs (x), x2;
35742 if (!isless (xa, TWO52))
35744 xa = xa + TWO52 - TWO52;
35745 x2 = copysign (xa, x);
35754 enum machine_mode mode
= GET_MODE (operand0
);
35755 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
35757 TWO52
= ix86_gen_TWO52 (mode
);
35759 /* Temporary for holding the result, initialized to the input
35760 operand to ease control flow. */
35761 res
= gen_reg_rtx (mode
);
35762 emit_move_insn (res
, operand1
);
35764 /* xa = abs (operand1) */
35765 xa
= ix86_expand_sse_fabs (res
, &mask
);
35767 /* if (!isless (xa, TWO52)) goto label; */
35768 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35770 /* xa = xa + TWO52 - TWO52; */
35771 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35772 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35774 /* xa = copysign (xa, operand1) */
35775 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
35777 /* generate 1.0 or -1.0 */
35778 one
= force_reg (mode
,
35779 const_double_from_real_value (do_floor
35780 ? dconst1
: dconstm1
, mode
));
35782 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35783 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35784 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35785 gen_rtx_AND (mode
, one
, tmp
)));
35786 /* We always need to subtract here to preserve signed zero. */
35787 tmp
= expand_simple_binop (mode
, MINUS
,
35788 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35789 emit_move_insn (res
, tmp
);
35791 emit_label (label
);
35792 LABEL_NUSES (label
) = 1;
35794 emit_move_insn (operand0
, res
);
35797 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35800 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
35802 /* C code for the stuff we expand below.
35803 double xa = fabs (x), x2;
35804 if (!isless (xa, TWO52))
35806 x2 = (double)(long)x;
35813 if (HONOR_SIGNED_ZEROS (mode))
35814 return copysign (x2, x);
35817 enum machine_mode mode
= GET_MODE (operand0
);
35818 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
35820 TWO52
= ix86_gen_TWO52 (mode
);
35822 /* Temporary for holding the result, initialized to the input
35823 operand to ease control flow. */
35824 res
= gen_reg_rtx (mode
);
35825 emit_move_insn (res
, operand1
);
35827 /* xa = abs (operand1) */
35828 xa
= ix86_expand_sse_fabs (res
, &mask
);
35830 /* if (!isless (xa, TWO52)) goto label; */
35831 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35833 /* xa = (double)(long)x */
35834 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35835 expand_fix (xi
, res
, 0);
35836 expand_float (xa
, xi
, 0);
35839 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35841 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35842 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35843 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35844 gen_rtx_AND (mode
, one
, tmp
)));
35845 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
35846 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35847 emit_move_insn (res
, tmp
);
35849 if (HONOR_SIGNED_ZEROS (mode
))
35850 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35852 emit_label (label
);
35853 LABEL_NUSES (label
) = 1;
35855 emit_move_insn (operand0
, res
);
35858 /* Expand SSE sequence for computing round from OPERAND1 storing
35859 into OPERAND0. Sequence that works without relying on DImode truncation
35860 via cvttsd2siq that is only available on 64bit targets. */
35862 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
35864 /* C code for the stuff we expand below.
35865 double xa = fabs (x), xa2, x2;
35866 if (!isless (xa, TWO52))
35868 Using the absolute value and copying back sign makes
35869 -0.0 -> -0.0 correct.
35870 xa2 = xa + TWO52 - TWO52;
35875 else if (dxa > 0.5)
35877 x2 = copysign (xa2, x);
35880 enum machine_mode mode
= GET_MODE (operand0
);
35881 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
35883 TWO52
= ix86_gen_TWO52 (mode
);
35885 /* Temporary for holding the result, initialized to the input
35886 operand to ease control flow. */
35887 res
= gen_reg_rtx (mode
);
35888 emit_move_insn (res
, operand1
);
35890 /* xa = abs (operand1) */
35891 xa
= ix86_expand_sse_fabs (res
, &mask
);
35893 /* if (!isless (xa, TWO52)) goto label; */
35894 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35896 /* xa2 = xa + TWO52 - TWO52; */
35897 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35898 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
35900 /* dxa = xa2 - xa; */
35901 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
35903 /* generate 0.5, 1.0 and -0.5 */
35904 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
35905 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35906 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
35910 tmp
= gen_reg_rtx (mode
);
35911 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35912 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
35913 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35914 gen_rtx_AND (mode
, one
, tmp
)));
35915 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35916 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35917 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
35918 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35919 gen_rtx_AND (mode
, one
, tmp
)));
35920 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35922 /* res = copysign (xa2, operand1) */
35923 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
35925 emit_label (label
);
35926 LABEL_NUSES (label
) = 1;
35928 emit_move_insn (operand0
, res
);
35931 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35934 ix86_expand_trunc (rtx operand0
, rtx operand1
)
35936 /* C code for SSE variant we expand below.
35937 double xa = fabs (x), x2;
35938 if (!isless (xa, TWO52))
35940 x2 = (double)(long)x;
35941 if (HONOR_SIGNED_ZEROS (mode))
35942 return copysign (x2, x);
35945 enum machine_mode mode
= GET_MODE (operand0
);
35946 rtx xa
, xi
, TWO52
, label
, res
, mask
;
35948 TWO52
= ix86_gen_TWO52 (mode
);
35950 /* Temporary for holding the result, initialized to the input
35951 operand to ease control flow. */
35952 res
= gen_reg_rtx (mode
);
35953 emit_move_insn (res
, operand1
);
35955 /* xa = abs (operand1) */
35956 xa
= ix86_expand_sse_fabs (res
, &mask
);
35958 /* if (!isless (xa, TWO52)) goto label; */
35959 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35961 /* x = (double)(long)x */
35962 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35963 expand_fix (xi
, res
, 0);
35964 expand_float (res
, xi
, 0);
35966 if (HONOR_SIGNED_ZEROS (mode
))
35967 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35969 emit_label (label
);
35970 LABEL_NUSES (label
) = 1;
35972 emit_move_insn (operand0
, res
);
35975 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35978 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
35980 enum machine_mode mode
= GET_MODE (operand0
);
35981 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
35983 /* C code for SSE variant we expand below.
35984 double xa = fabs (x), x2;
35985 if (!isless (xa, TWO52))
35987 xa2 = xa + TWO52 - TWO52;
35991 x2 = copysign (xa2, x);
35995 TWO52
= ix86_gen_TWO52 (mode
);
35997 /* Temporary for holding the result, initialized to the input
35998 operand to ease control flow. */
35999 res
= gen_reg_rtx (mode
);
36000 emit_move_insn (res
, operand1
);
36002 /* xa = abs (operand1) */
36003 xa
= ix86_expand_sse_fabs (res
, &smask
);
36005 /* if (!isless (xa, TWO52)) goto label; */
36006 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36008 /* res = xa + TWO52 - TWO52; */
36009 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36010 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
36011 emit_move_insn (res
, tmp
);
36014 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
36016 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
36017 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
36018 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
36019 gen_rtx_AND (mode
, mask
, one
)));
36020 tmp
= expand_simple_binop (mode
, MINUS
,
36021 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
36022 emit_move_insn (res
, tmp
);
36024 /* res = copysign (res, operand1) */
36025 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
36027 emit_label (label
);
36028 LABEL_NUSES (label
) = 1;
36030 emit_move_insn (operand0
, res
);
36033 /* Expand SSE sequence for computing round from OPERAND1 storing
36036 ix86_expand_round (rtx operand0
, rtx operand1
)
36038 /* C code for the stuff we're doing below:
36039 double xa = fabs (x);
36040 if (!isless (xa, TWO52))
36042 xa = (double)(long)(xa + nextafter (0.5, 0.0));
36043 return copysign (xa, x);
36045 enum machine_mode mode
= GET_MODE (operand0
);
36046 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
36047 const struct real_format
*fmt
;
36048 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
36050 /* Temporary for holding the result, initialized to the input
36051 operand to ease control flow. */
36052 res
= gen_reg_rtx (mode
);
36053 emit_move_insn (res
, operand1
);
36055 TWO52
= ix86_gen_TWO52 (mode
);
36056 xa
= ix86_expand_sse_fabs (res
, &mask
);
36057 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36059 /* load nextafter (0.5, 0.0) */
36060 fmt
= REAL_MODE_FORMAT (mode
);
36061 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
36062 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
36064 /* xa = xa + 0.5 */
36065 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
36066 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
36068 /* xa = (double)(int64_t)xa */
36069 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36070 expand_fix (xi
, xa
, 0);
36071 expand_float (xa
, xi
, 0);
36073 /* res = copysign (xa, operand1) */
36074 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
36076 emit_label (label
);
36077 LABEL_NUSES (label
) = 1;
36079 emit_move_insn (operand0
, res
);
36082 /* Expand SSE sequence for computing round
36083 from OP1 storing into OP0 using sse4 round insn. */
36085 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
36087 enum machine_mode mode
= GET_MODE (op0
);
36088 rtx e1
, e2
, res
, half
;
36089 const struct real_format
*fmt
;
36090 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
36091 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
36092 rtx (*gen_round
) (rtx
, rtx
, rtx
);
36097 gen_copysign
= gen_copysignsf3
;
36098 gen_round
= gen_sse4_1_roundsf2
;
36101 gen_copysign
= gen_copysigndf3
;
36102 gen_round
= gen_sse4_1_rounddf2
;
36105 gcc_unreachable ();
36108 /* round (a) = trunc (a + copysign (0.5, a)) */
36110 /* load nextafter (0.5, 0.0) */
36111 fmt
= REAL_MODE_FORMAT (mode
);
36112 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
36113 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
36114 half
= const_double_from_real_value (pred_half
, mode
);
36116 /* e1 = copysign (0.5, op1) */
36117 e1
= gen_reg_rtx (mode
);
36118 emit_insn (gen_copysign (e1
, half
, op1
));
36120 /* e2 = op1 + e1 */
36121 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
36123 /* res = trunc (e2) */
36124 res
= gen_reg_rtx (mode
);
36125 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
36127 emit_move_insn (op0
, res
);
36131 /* Table of valid machine attributes. */
36132 static const struct attribute_spec ix86_attribute_table
[] =
36134 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
36135 affects_type_identity } */
36136 /* Stdcall attribute says callee is responsible for popping arguments
36137 if they are not variable. */
36138 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36140 /* Fastcall attribute says callee is responsible for popping arguments
36141 if they are not variable. */
36142 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36144 /* Thiscall attribute says callee is responsible for popping arguments
36145 if they are not variable. */
36146 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36148 /* Cdecl attribute says the callee is a normal C declaration */
36149 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36151 /* Regparm attribute specifies how many integer arguments are to be
36152 passed in registers. */
36153 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
36155 /* Sseregparm attribute says we are using x86_64 calling conventions
36156 for FP arguments. */
36157 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36159 /* The transactional memory builtins are implicitly regparm or fastcall
36160 depending on the ABI. Override the generic do-nothing attribute that
36161 these builtins were declared with. */
36162 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
36164 /* force_align_arg_pointer says this function realigns the stack at entry. */
36165 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
36166 false, true, true, ix86_handle_cconv_attribute
, false },
36167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36168 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
36169 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
36170 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
36173 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
36175 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
36177 #ifdef SUBTARGET_ATTRIBUTE_TABLE
36178 SUBTARGET_ATTRIBUTE_TABLE
,
36180 /* ms_abi and sysv_abi calling convention function attributes. */
36181 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
36182 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
36183 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
36185 { "callee_pop_aggregate_return", 1, 1, false, true, true,
36186 ix86_handle_callee_pop_aggregate_return
, true },
36188 { NULL
, 0, 0, false, false, false, NULL
, false }
36191 /* Implement targetm.vectorize.builtin_vectorization_cost. */
36193 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
36195 int misalign ATTRIBUTE_UNUSED
)
36199 switch (type_of_cost
)
36202 return ix86_cost
->scalar_stmt_cost
;
36205 return ix86_cost
->scalar_load_cost
;
36208 return ix86_cost
->scalar_store_cost
;
36211 return ix86_cost
->vec_stmt_cost
;
36214 return ix86_cost
->vec_align_load_cost
;
36217 return ix86_cost
->vec_store_cost
;
36219 case vec_to_scalar
:
36220 return ix86_cost
->vec_to_scalar_cost
;
36222 case scalar_to_vec
:
36223 return ix86_cost
->scalar_to_vec_cost
;
36225 case unaligned_load
:
36226 case unaligned_store
:
36227 return ix86_cost
->vec_unalign_load_cost
;
36229 case cond_branch_taken
:
36230 return ix86_cost
->cond_taken_branch_cost
;
36232 case cond_branch_not_taken
:
36233 return ix86_cost
->cond_not_taken_branch_cost
;
36236 case vec_promote_demote
:
36237 return ix86_cost
->vec_stmt_cost
;
36239 case vec_construct
:
36240 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
36241 return elements
/ 2 + 1;
36244 gcc_unreachable ();
36248 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
36249 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
36250 insn every time. */
36252 static GTY(()) rtx vselect_insn
;
36254 /* Initialize vselect_insn. */
36257 init_vselect_insn (void)
36262 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
36263 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
36264 XVECEXP (x
, 0, i
) = const0_rtx
;
36265 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
36267 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
36269 vselect_insn
= emit_insn (x
);
36273 /* Construct (set target (vec_select op0 (parallel perm))) and
36274 return true if that's a valid instruction in the active ISA. */
36277 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
36278 unsigned nelt
, bool testing_p
)
36281 rtx x
, save_vconcat
;
36284 if (vselect_insn
== NULL_RTX
)
36285 init_vselect_insn ();
36287 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
36288 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
36289 for (i
= 0; i
< nelt
; ++i
)
36290 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
36291 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36292 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
36293 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
36294 SET_DEST (PATTERN (vselect_insn
)) = target
;
36295 icode
= recog_memoized (vselect_insn
);
36297 if (icode
>= 0 && !testing_p
)
36298 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
36300 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
36301 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
36302 INSN_CODE (vselect_insn
) = -1;
36307 /* Similar, but generate a vec_concat from op0 and op1 as well. */
36310 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
36311 const unsigned char *perm
, unsigned nelt
,
36314 enum machine_mode v2mode
;
36318 if (vselect_insn
== NULL_RTX
)
36319 init_vselect_insn ();
36321 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
36322 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36323 PUT_MODE (x
, v2mode
);
36326 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
36327 XEXP (x
, 0) = const0_rtx
;
36328 XEXP (x
, 1) = const0_rtx
;
36332 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36333 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
36336 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
36338 enum machine_mode vmode
= d
->vmode
;
36339 unsigned i
, mask
, nelt
= d
->nelt
;
36340 rtx target
, op0
, op1
, x
;
36341 rtx rperm
[32], vperm
;
36343 if (d
->one_operand_p
)
36345 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
36347 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
36349 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
36354 /* This is a blend, not a permute. Elements must stay in their
36355 respective lanes. */
36356 for (i
= 0; i
< nelt
; ++i
)
36358 unsigned e
= d
->perm
[i
];
36359 if (!(e
== i
|| e
== i
+ nelt
))
36366 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
36367 decision should be extracted elsewhere, so that we only try that
36368 sequence once all budget==3 options have been tried. */
36369 target
= d
->target
;
36382 for (i
= 0; i
< nelt
; ++i
)
36383 mask
|= (d
->perm
[i
] >= nelt
) << i
;
36387 for (i
= 0; i
< 2; ++i
)
36388 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
36393 for (i
= 0; i
< 4; ++i
)
36394 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36399 /* See if bytes move in pairs so we can use pblendw with
36400 an immediate argument, rather than pblendvb with a vector
36402 for (i
= 0; i
< 16; i
+= 2)
36403 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36406 for (i
= 0; i
< nelt
; ++i
)
36407 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
36410 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
36411 vperm
= force_reg (vmode
, vperm
);
36413 if (GET_MODE_SIZE (vmode
) == 16)
36414 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
36416 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
36420 for (i
= 0; i
< 8; ++i
)
36421 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36426 target
= gen_lowpart (vmode
, target
);
36427 op0
= gen_lowpart (vmode
, op0
);
36428 op1
= gen_lowpart (vmode
, op1
);
36432 /* See if bytes move in pairs. If not, vpblendvb must be used. */
36433 for (i
= 0; i
< 32; i
+= 2)
36434 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36436 /* See if bytes move in quadruplets. If yes, vpblendd
36437 with immediate can be used. */
36438 for (i
= 0; i
< 32; i
+= 4)
36439 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
36443 /* See if bytes move the same in both lanes. If yes,
36444 vpblendw with immediate can be used. */
36445 for (i
= 0; i
< 16; i
+= 2)
36446 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
36449 /* Use vpblendw. */
36450 for (i
= 0; i
< 16; ++i
)
36451 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
36456 /* Use vpblendd. */
36457 for (i
= 0; i
< 8; ++i
)
36458 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
36463 /* See if words move in pairs. If yes, vpblendd can be used. */
36464 for (i
= 0; i
< 16; i
+= 2)
36465 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36469 /* See if words move the same in both lanes. If not,
36470 vpblendvb must be used. */
36471 for (i
= 0; i
< 8; i
++)
36472 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
36474 /* Use vpblendvb. */
36475 for (i
= 0; i
< 32; ++i
)
36476 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
36480 target
= gen_lowpart (vmode
, target
);
36481 op0
= gen_lowpart (vmode
, op0
);
36482 op1
= gen_lowpart (vmode
, op1
);
36483 goto finish_pblendvb
;
36486 /* Use vpblendw. */
36487 for (i
= 0; i
< 16; ++i
)
36488 mask
|= (d
->perm
[i
] >= 16) << i
;
36492 /* Use vpblendd. */
36493 for (i
= 0; i
< 8; ++i
)
36494 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36499 /* Use vpblendd. */
36500 for (i
= 0; i
< 4; ++i
)
36501 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36506 gcc_unreachable ();
36509 /* This matches five different patterns with the different modes. */
36510 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
36511 x
= gen_rtx_SET (VOIDmode
, target
, x
);
36517 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36518 in terms of the variable form of vpermilps.
36520 Note that we will have already failed the immediate input vpermilps,
36521 which requires that the high and low part shuffle be identical; the
36522 variable form doesn't require that. */
36525 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
36527 rtx rperm
[8], vperm
;
36530 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
36533 /* We can only permute within the 128-bit lane. */
36534 for (i
= 0; i
< 8; ++i
)
36536 unsigned e
= d
->perm
[i
];
36537 if (i
< 4 ? e
>= 4 : e
< 4)
36544 for (i
= 0; i
< 8; ++i
)
36546 unsigned e
= d
->perm
[i
];
36548 /* Within each 128-bit lane, the elements of op0 are numbered
36549 from 0 and the elements of op1 are numbered from 4. */
36555 rperm
[i
] = GEN_INT (e
);
36558 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
36559 vperm
= force_reg (V8SImode
, vperm
);
36560 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
36565 /* Return true if permutation D can be performed as VMODE permutation
36569 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
36571 unsigned int i
, j
, chunk
;
36573 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
36574 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
36575 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
36578 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
36581 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
36582 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
36583 if (d
->perm
[i
] & (chunk
- 1))
36586 for (j
= 1; j
< chunk
; ++j
)
36587 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
36593 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36594 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
36597 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
36599 unsigned i
, nelt
, eltsz
, mask
;
36600 unsigned char perm
[32];
36601 enum machine_mode vmode
= V16QImode
;
36602 rtx rperm
[32], vperm
, target
, op0
, op1
;
36606 if (!d
->one_operand_p
)
36608 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
36611 && valid_perm_using_mode_p (V2TImode
, d
))
36616 /* Use vperm2i128 insn. The pattern uses
36617 V4DImode instead of V2TImode. */
36618 target
= gen_lowpart (V4DImode
, d
->target
);
36619 op0
= gen_lowpart (V4DImode
, d
->op0
);
36620 op1
= gen_lowpart (V4DImode
, d
->op1
);
36622 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
36623 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
36624 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
36632 if (GET_MODE_SIZE (d
->vmode
) == 16)
36637 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36642 /* V4DImode should be already handled through
36643 expand_vselect by vpermq instruction. */
36644 gcc_assert (d
->vmode
!= V4DImode
);
36647 if (d
->vmode
== V8SImode
36648 || d
->vmode
== V16HImode
36649 || d
->vmode
== V32QImode
)
36651 /* First see if vpermq can be used for
36652 V8SImode/V16HImode/V32QImode. */
36653 if (valid_perm_using_mode_p (V4DImode
, d
))
36655 for (i
= 0; i
< 4; i
++)
36656 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
36659 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
36660 gen_lowpart (V4DImode
, d
->op0
),
36664 /* Next see if vpermd can be used. */
36665 if (valid_perm_using_mode_p (V8SImode
, d
))
36668 /* Or if vpermps can be used. */
36669 else if (d
->vmode
== V8SFmode
)
36672 if (vmode
== V32QImode
)
36674 /* vpshufb only works intra lanes, it is not
36675 possible to shuffle bytes in between the lanes. */
36676 for (i
= 0; i
< nelt
; ++i
)
36677 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
36688 if (vmode
== V8SImode
)
36689 for (i
= 0; i
< 8; ++i
)
36690 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
36693 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36694 if (!d
->one_operand_p
)
36695 mask
= 2 * nelt
- 1;
36696 else if (vmode
== V16QImode
)
36699 mask
= nelt
/ 2 - 1;
36701 for (i
= 0; i
< nelt
; ++i
)
36703 unsigned j
, e
= d
->perm
[i
] & mask
;
36704 for (j
= 0; j
< eltsz
; ++j
)
36705 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
36709 vperm
= gen_rtx_CONST_VECTOR (vmode
,
36710 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
36711 vperm
= force_reg (vmode
, vperm
);
36713 target
= gen_lowpart (vmode
, d
->target
);
36714 op0
= gen_lowpart (vmode
, d
->op0
);
36715 if (d
->one_operand_p
)
36717 if (vmode
== V16QImode
)
36718 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
36719 else if (vmode
== V32QImode
)
36720 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
36721 else if (vmode
== V8SFmode
)
36722 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
36724 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
36728 op1
= gen_lowpart (vmode
, d
->op1
);
36729 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
36735 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36736 in a single instruction. */
36739 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
36741 unsigned i
, nelt
= d
->nelt
;
36742 unsigned char perm2
[MAX_VECT_LEN
];
36744 /* Check plain VEC_SELECT first, because AVX has instructions that could
36745 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36746 input where SEL+CONCAT may not. */
36747 if (d
->one_operand_p
)
36749 int mask
= nelt
- 1;
36750 bool identity_perm
= true;
36751 bool broadcast_perm
= true;
36753 for (i
= 0; i
< nelt
; i
++)
36755 perm2
[i
] = d
->perm
[i
] & mask
;
36757 identity_perm
= false;
36759 broadcast_perm
= false;
36765 emit_move_insn (d
->target
, d
->op0
);
36768 else if (broadcast_perm
&& TARGET_AVX2
)
36770 /* Use vpbroadcast{b,w,d}. */
36771 rtx (*gen
) (rtx
, rtx
) = NULL
;
36775 gen
= gen_avx2_pbroadcastv32qi_1
;
36778 gen
= gen_avx2_pbroadcastv16hi_1
;
36781 gen
= gen_avx2_pbroadcastv8si_1
;
36784 gen
= gen_avx2_pbroadcastv16qi
;
36787 gen
= gen_avx2_pbroadcastv8hi
;
36790 gen
= gen_avx2_vec_dupv8sf_1
;
36792 /* For other modes prefer other shuffles this function creates. */
36798 emit_insn (gen (d
->target
, d
->op0
));
36803 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
36806 /* There are plenty of patterns in sse.md that are written for
36807 SEL+CONCAT and are not replicated for a single op. Perhaps
36808 that should be changed, to avoid the nastiness here. */
36810 /* Recognize interleave style patterns, which means incrementing
36811 every other permutation operand. */
36812 for (i
= 0; i
< nelt
; i
+= 2)
36814 perm2
[i
] = d
->perm
[i
] & mask
;
36815 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
36817 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
36821 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36824 for (i
= 0; i
< nelt
; i
+= 4)
36826 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
36827 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
36828 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
36829 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
36832 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
36838 /* Finally, try the fully general two operand permute. */
36839 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
36843 /* Recognize interleave style patterns with reversed operands. */
36844 if (!d
->one_operand_p
)
36846 for (i
= 0; i
< nelt
; ++i
)
36848 unsigned e
= d
->perm
[i
];
36856 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
36861 /* Try the SSE4.1 blend variable merge instructions. */
36862 if (expand_vec_perm_blend (d
))
36865 /* Try one of the AVX vpermil variable permutations. */
36866 if (expand_vec_perm_vpermil (d
))
36869 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36870 vpshufb, vpermd, vpermps or vpermq variable permutation. */
36871 if (expand_vec_perm_pshufb (d
))
36877 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36878 in terms of a pair of pshuflw + pshufhw instructions. */
36881 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
36883 unsigned char perm2
[MAX_VECT_LEN
];
36887 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
36890 /* The two permutations only operate in 64-bit lanes. */
36891 for (i
= 0; i
< 4; ++i
)
36892 if (d
->perm
[i
] >= 4)
36894 for (i
= 4; i
< 8; ++i
)
36895 if (d
->perm
[i
] < 4)
36901 /* Emit the pshuflw. */
36902 memcpy (perm2
, d
->perm
, 4);
36903 for (i
= 4; i
< 8; ++i
)
36905 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
36908 /* Emit the pshufhw. */
36909 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
36910 for (i
= 0; i
< 4; ++i
)
36912 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
36918 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36919 the permutation using the SSSE3 palignr instruction. This succeeds
36920 when all of the elements in PERM fit within one vector and we merely
36921 need to shift them down so that a single vector permutation has a
36922 chance to succeed. */
36925 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
36927 unsigned i
, nelt
= d
->nelt
;
36932 /* Even with AVX, palignr only operates on 128-bit vectors. */
36933 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36936 min
= nelt
, max
= 0;
36937 for (i
= 0; i
< nelt
; ++i
)
36939 unsigned e
= d
->perm
[i
];
36945 if (min
== 0 || max
- min
>= nelt
)
36948 /* Given that we have SSSE3, we know we'll be able to implement the
36949 single operand permutation after the palignr with pshufb. */
36953 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
36954 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
36955 gen_lowpart (TImode
, d
->op1
),
36956 gen_lowpart (TImode
, d
->op0
), shift
));
36958 d
->op0
= d
->op1
= d
->target
;
36959 d
->one_operand_p
= true;
36962 for (i
= 0; i
< nelt
; ++i
)
36964 unsigned e
= d
->perm
[i
] - min
;
36970 /* Test for the degenerate case where the alignment by itself
36971 produces the desired permutation. */
36975 ok
= expand_vec_perm_1 (d
);
36981 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
36983 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36984 a two vector permutation into a single vector permutation by using
36985 an interleave operation to merge the vectors. */
36988 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
36990 struct expand_vec_perm_d dremap
, dfinal
;
36991 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36992 unsigned HOST_WIDE_INT contents
;
36993 unsigned char remap
[2 * MAX_VECT_LEN
];
36995 bool ok
, same_halves
= false;
36997 if (GET_MODE_SIZE (d
->vmode
) == 16)
36999 if (d
->one_operand_p
)
37002 else if (GET_MODE_SIZE (d
->vmode
) == 32)
37006 /* For 32-byte modes allow even d->one_operand_p.
37007 The lack of cross-lane shuffling in some instructions
37008 might prevent a single insn shuffle. */
37010 dfinal
.testing_p
= true;
37011 /* If expand_vec_perm_interleave3 can expand this into
37012 a 3 insn sequence, give up and let it be expanded as
37013 3 insn sequence. While that is one insn longer,
37014 it doesn't need a memory operand and in the common
37015 case that both interleave low and high permutations
37016 with the same operands are adjacent needs 4 insns
37017 for both after CSE. */
37018 if (expand_vec_perm_interleave3 (&dfinal
))
37024 /* Examine from whence the elements come. */
37026 for (i
= 0; i
< nelt
; ++i
)
37027 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
37029 memset (remap
, 0xff, sizeof (remap
));
37032 if (GET_MODE_SIZE (d
->vmode
) == 16)
37034 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
37036 /* Split the two input vectors into 4 halves. */
37037 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
37042 /* If the elements from the low halves use interleave low, and similarly
37043 for interleave high. If the elements are from mis-matched halves, we
37044 can use shufps for V4SF/V4SI or do a DImode shuffle. */
37045 if ((contents
& (h1
| h3
)) == contents
)
37048 for (i
= 0; i
< nelt2
; ++i
)
37051 remap
[i
+ nelt
] = i
* 2 + 1;
37052 dremap
.perm
[i
* 2] = i
;
37053 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
37055 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
37056 dremap
.vmode
= V4SFmode
;
37058 else if ((contents
& (h2
| h4
)) == contents
)
37061 for (i
= 0; i
< nelt2
; ++i
)
37063 remap
[i
+ nelt2
] = i
* 2;
37064 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
37065 dremap
.perm
[i
* 2] = i
+ nelt2
;
37066 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
37068 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
37069 dremap
.vmode
= V4SFmode
;
37071 else if ((contents
& (h1
| h4
)) == contents
)
37074 for (i
= 0; i
< nelt2
; ++i
)
37077 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
37078 dremap
.perm
[i
] = i
;
37079 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
37084 dremap
.vmode
= V2DImode
;
37086 dremap
.perm
[0] = 0;
37087 dremap
.perm
[1] = 3;
37090 else if ((contents
& (h2
| h3
)) == contents
)
37093 for (i
= 0; i
< nelt2
; ++i
)
37095 remap
[i
+ nelt2
] = i
;
37096 remap
[i
+ nelt
] = i
+ nelt2
;
37097 dremap
.perm
[i
] = i
+ nelt2
;
37098 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
37103 dremap
.vmode
= V2DImode
;
37105 dremap
.perm
[0] = 1;
37106 dremap
.perm
[1] = 2;
37114 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
37115 unsigned HOST_WIDE_INT q
[8];
37116 unsigned int nonzero_halves
[4];
37118 /* Split the two input vectors into 8 quarters. */
37119 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
37120 for (i
= 1; i
< 8; ++i
)
37121 q
[i
] = q
[0] << (nelt4
* i
);
37122 for (i
= 0; i
< 4; ++i
)
37123 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
37125 nonzero_halves
[nzcnt
] = i
;
37131 gcc_assert (d
->one_operand_p
);
37132 nonzero_halves
[1] = nonzero_halves
[0];
37133 same_halves
= true;
37135 else if (d
->one_operand_p
)
37137 gcc_assert (nonzero_halves
[0] == 0);
37138 gcc_assert (nonzero_halves
[1] == 1);
37143 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
37145 /* Attempt to increase the likelihood that dfinal
37146 shuffle will be intra-lane. */
37147 char tmph
= nonzero_halves
[0];
37148 nonzero_halves
[0] = nonzero_halves
[1];
37149 nonzero_halves
[1] = tmph
;
37152 /* vperm2f128 or vperm2i128. */
37153 for (i
= 0; i
< nelt2
; ++i
)
37155 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
37156 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
37157 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
37158 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
37161 if (d
->vmode
!= V8SFmode
37162 && d
->vmode
!= V4DFmode
37163 && d
->vmode
!= V8SImode
)
37165 dremap
.vmode
= V8SImode
;
37167 for (i
= 0; i
< 4; ++i
)
37169 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
37170 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
37174 else if (d
->one_operand_p
)
37176 else if (TARGET_AVX2
37177 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
37180 for (i
= 0; i
< nelt4
; ++i
)
37183 remap
[i
+ nelt
] = i
* 2 + 1;
37184 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
37185 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
37186 dremap
.perm
[i
* 2] = i
;
37187 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
37188 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
37189 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
37192 else if (TARGET_AVX2
37193 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
37196 for (i
= 0; i
< nelt4
; ++i
)
37198 remap
[i
+ nelt4
] = i
* 2;
37199 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
37200 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
37201 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
37202 dremap
.perm
[i
* 2] = i
+ nelt4
;
37203 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
37204 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
37205 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
37212 /* Use the remapping array set up above to move the elements from their
37213 swizzled locations into their final destinations. */
37215 for (i
= 0; i
< nelt
; ++i
)
37217 unsigned e
= remap
[d
->perm
[i
]];
37218 gcc_assert (e
< nelt
);
37219 /* If same_halves is true, both halves of the remapped vector are the
37220 same. Avoid cross-lane accesses if possible. */
37221 if (same_halves
&& i
>= nelt2
)
37223 gcc_assert (e
< nelt2
);
37224 dfinal
.perm
[i
] = e
+ nelt2
;
37227 dfinal
.perm
[i
] = e
;
37229 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
37230 dfinal
.op1
= dfinal
.op0
;
37231 dfinal
.one_operand_p
= true;
37232 dremap
.target
= dfinal
.op0
;
37234 /* Test if the final remap can be done with a single insn. For V4SFmode or
37235 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
37237 ok
= expand_vec_perm_1 (&dfinal
);
37238 seq
= get_insns ();
37247 if (dremap
.vmode
!= dfinal
.vmode
)
37249 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
37250 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
37251 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
37254 ok
= expand_vec_perm_1 (&dremap
);
37261 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37262 a single vector cross-lane permutation into vpermq followed
37263 by any of the single insn permutations. */
37266 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
37268 struct expand_vec_perm_d dremap
, dfinal
;
37269 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
37270 unsigned contents
[2];
37274 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
37275 && d
->one_operand_p
))
37280 for (i
= 0; i
< nelt2
; ++i
)
37282 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
37283 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
37286 for (i
= 0; i
< 2; ++i
)
37288 unsigned int cnt
= 0;
37289 for (j
= 0; j
< 4; ++j
)
37290 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
37298 dremap
.vmode
= V4DImode
;
37300 dremap
.target
= gen_reg_rtx (V4DImode
);
37301 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
37302 dremap
.op1
= dremap
.op0
;
37303 dremap
.one_operand_p
= true;
37304 for (i
= 0; i
< 2; ++i
)
37306 unsigned int cnt
= 0;
37307 for (j
= 0; j
< 4; ++j
)
37308 if ((contents
[i
] & (1u << j
)) != 0)
37309 dremap
.perm
[2 * i
+ cnt
++] = j
;
37310 for (; cnt
< 2; ++cnt
)
37311 dremap
.perm
[2 * i
+ cnt
] = 0;
37315 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
37316 dfinal
.op1
= dfinal
.op0
;
37317 dfinal
.one_operand_p
= true;
37318 for (i
= 0, j
= 0; i
< nelt
; ++i
)
37322 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
37323 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
37325 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
37326 dfinal
.perm
[i
] |= nelt4
;
37328 gcc_unreachable ();
37331 ok
= expand_vec_perm_1 (&dremap
);
37334 ok
= expand_vec_perm_1 (&dfinal
);
37340 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
37341 a vector permutation using two instructions, vperm2f128 resp.
37342 vperm2i128 followed by any single in-lane permutation. */
37345 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
37347 struct expand_vec_perm_d dfirst
, dsecond
;
37348 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
37352 || GET_MODE_SIZE (d
->vmode
) != 32
37353 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
37357 dsecond
.one_operand_p
= false;
37358 dsecond
.testing_p
= true;
37360 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
37361 immediate. For perm < 16 the second permutation uses
37362 d->op0 as first operand, for perm >= 16 it uses d->op1
37363 as first operand. The second operand is the result of
37365 for (perm
= 0; perm
< 32; perm
++)
37367 /* Ignore permutations which do not move anything cross-lane. */
37370 /* The second shuffle for e.g. V4DFmode has
37371 0123 and ABCD operands.
37372 Ignore AB23, as 23 is already in the second lane
37373 of the first operand. */
37374 if ((perm
& 0xc) == (1 << 2)) continue;
37375 /* And 01CD, as 01 is in the first lane of the first
37377 if ((perm
& 3) == 0) continue;
37378 /* And 4567, as then the vperm2[fi]128 doesn't change
37379 anything on the original 4567 second operand. */
37380 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
37384 /* The second shuffle for e.g. V4DFmode has
37385 4567 and ABCD operands.
37386 Ignore AB67, as 67 is already in the second lane
37387 of the first operand. */
37388 if ((perm
& 0xc) == (3 << 2)) continue;
37389 /* And 45CD, as 45 is in the first lane of the first
37391 if ((perm
& 3) == 2) continue;
37392 /* And 0123, as then the vperm2[fi]128 doesn't change
37393 anything on the original 0123 first operand. */
37394 if ((perm
& 0xf) == (1 << 2)) continue;
37397 for (i
= 0; i
< nelt
; i
++)
37399 j
= d
->perm
[i
] / nelt2
;
37400 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
37401 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
37402 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
37403 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
37411 ok
= expand_vec_perm_1 (&dsecond
);
37422 /* Found a usable second shuffle. dfirst will be
37423 vperm2f128 on d->op0 and d->op1. */
37424 dsecond
.testing_p
= false;
37426 dfirst
.target
= gen_reg_rtx (d
->vmode
);
37427 for (i
= 0; i
< nelt
; i
++)
37428 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
37429 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
37431 ok
= expand_vec_perm_1 (&dfirst
);
37434 /* And dsecond is some single insn shuffle, taking
37435 d->op0 and result of vperm2f128 (if perm < 16) or
37436 d->op1 and result of vperm2f128 (otherwise). */
37437 dsecond
.op1
= dfirst
.target
;
37439 dsecond
.op0
= dfirst
.op1
;
37441 ok
= expand_vec_perm_1 (&dsecond
);
37447 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
37448 if (d
->one_operand_p
)
37455 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37456 a two vector permutation using 2 intra-lane interleave insns
37457 and cross-lane shuffle for 32-byte vectors. */
37460 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
37463 rtx (*gen
) (rtx
, rtx
, rtx
);
37465 if (d
->one_operand_p
)
37467 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
37469 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
37475 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
37477 for (i
= 0; i
< nelt
; i
+= 2)
37478 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
37479 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
37489 gen
= gen_vec_interleave_highv32qi
;
37491 gen
= gen_vec_interleave_lowv32qi
;
37495 gen
= gen_vec_interleave_highv16hi
;
37497 gen
= gen_vec_interleave_lowv16hi
;
37501 gen
= gen_vec_interleave_highv8si
;
37503 gen
= gen_vec_interleave_lowv8si
;
37507 gen
= gen_vec_interleave_highv4di
;
37509 gen
= gen_vec_interleave_lowv4di
;
37513 gen
= gen_vec_interleave_highv8sf
;
37515 gen
= gen_vec_interleave_lowv8sf
;
37519 gen
= gen_vec_interleave_highv4df
;
37521 gen
= gen_vec_interleave_lowv4df
;
37524 gcc_unreachable ();
37527 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
37531 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
37532 a single vector permutation using a single intra-lane vector
37533 permutation, vperm2f128 swapping the lanes and vblend* insn blending
37534 the non-swapped and swapped vectors together. */
37537 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
37539 struct expand_vec_perm_d dfirst
, dsecond
;
37540 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
37543 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
37547 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
37548 || !d
->one_operand_p
)
37552 for (i
= 0; i
< nelt
; i
++)
37553 dfirst
.perm
[i
] = 0xff;
37554 for (i
= 0, msk
= 0; i
< nelt
; i
++)
37556 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
37557 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
37559 dfirst
.perm
[j
] = d
->perm
[i
];
37563 for (i
= 0; i
< nelt
; i
++)
37564 if (dfirst
.perm
[i
] == 0xff)
37565 dfirst
.perm
[i
] = i
;
37568 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
37571 ok
= expand_vec_perm_1 (&dfirst
);
37572 seq
= get_insns ();
37584 dsecond
.op0
= dfirst
.target
;
37585 dsecond
.op1
= dfirst
.target
;
37586 dsecond
.one_operand_p
= true;
37587 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
37588 for (i
= 0; i
< nelt
; i
++)
37589 dsecond
.perm
[i
] = i
^ nelt2
;
37591 ok
= expand_vec_perm_1 (&dsecond
);
37594 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
37595 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
37599 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
37600 permutation using two vperm2f128, followed by a vshufpd insn blending
37601 the two vectors together. */
37604 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
37606 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
37609 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
37619 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
37620 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
37621 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
37622 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
37623 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
37624 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
37625 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
37626 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
37627 dthird
.perm
[0] = (d
->perm
[0] % 2);
37628 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
37629 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
37630 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
37632 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
37633 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
37634 dthird
.op0
= dfirst
.target
;
37635 dthird
.op1
= dsecond
.target
;
37636 dthird
.one_operand_p
= false;
37638 canonicalize_perm (&dfirst
);
37639 canonicalize_perm (&dsecond
);
37641 ok
= expand_vec_perm_1 (&dfirst
)
37642 && expand_vec_perm_1 (&dsecond
)
37643 && expand_vec_perm_1 (&dthird
);
37650 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
37651 permutation with two pshufb insns and an ior. We should have already
37652 failed all two instruction sequences. */
37655 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
37657 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
37658 unsigned int i
, nelt
, eltsz
;
37660 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
37662 gcc_assert (!d
->one_operand_p
);
37665 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37667 /* Generate two permutation masks. If the required element is within
37668 the given vector it is shuffled into the proper lane. If the required
37669 element is in the other vector, force a zero into the lane by setting
37670 bit 7 in the permutation mask. */
37671 m128
= GEN_INT (-128);
37672 for (i
= 0; i
< nelt
; ++i
)
37674 unsigned j
, e
= d
->perm
[i
];
37675 unsigned which
= (e
>= nelt
);
37679 for (j
= 0; j
< eltsz
; ++j
)
37681 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
37682 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
37686 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
37687 vperm
= force_reg (V16QImode
, vperm
);
37689 l
= gen_reg_rtx (V16QImode
);
37690 op
= gen_lowpart (V16QImode
, d
->op0
);
37691 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
37693 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
37694 vperm
= force_reg (V16QImode
, vperm
);
37696 h
= gen_reg_rtx (V16QImode
);
37697 op
= gen_lowpart (V16QImode
, d
->op1
);
37698 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
37700 op
= gen_lowpart (V16QImode
, d
->target
);
37701 emit_insn (gen_iorv16qi3 (op
, l
, h
));
37706 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
37707 with two vpshufb insns, vpermq and vpor. We should have already failed
37708 all two or three instruction sequences. */
37711 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
37713 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
37714 unsigned int i
, nelt
, eltsz
;
37717 || !d
->one_operand_p
37718 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37725 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37727 /* Generate two permutation masks. If the required element is within
37728 the same lane, it is shuffled in. If the required element from the
37729 other lane, force a zero by setting bit 7 in the permutation mask.
37730 In the other mask the mask has non-negative elements if element
37731 is requested from the other lane, but also moved to the other lane,
37732 so that the result of vpshufb can have the two V2TImode halves
37734 m128
= GEN_INT (-128);
37735 for (i
= 0; i
< nelt
; ++i
)
37737 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37738 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37740 for (j
= 0; j
< eltsz
; ++j
)
37742 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
37743 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
37747 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37748 vperm
= force_reg (V32QImode
, vperm
);
37750 h
= gen_reg_rtx (V32QImode
);
37751 op
= gen_lowpart (V32QImode
, d
->op0
);
37752 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37754 /* Swap the 128-byte lanes of h into hp. */
37755 hp
= gen_reg_rtx (V4DImode
);
37756 op
= gen_lowpart (V4DImode
, h
);
37757 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
37760 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37761 vperm
= force_reg (V32QImode
, vperm
);
37763 l
= gen_reg_rtx (V32QImode
);
37764 op
= gen_lowpart (V32QImode
, d
->op0
);
37765 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37767 op
= gen_lowpart (V32QImode
, d
->target
);
37768 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
37773 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
37774 and extract-odd permutations of two V32QImode and V16QImode operand
37775 with two vpshufb insns, vpor and vpermq. We should have already
37776 failed all two or three instruction sequences. */
37779 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
37781 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
37782 unsigned int i
, nelt
, eltsz
;
37785 || d
->one_operand_p
37786 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37789 for (i
= 0; i
< d
->nelt
; ++i
)
37790 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
37797 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37799 /* Generate two permutation masks. In the first permutation mask
37800 the first quarter will contain indexes for the first half
37801 of the op0, the second quarter will contain bit 7 set, third quarter
37802 will contain indexes for the second half of the op0 and the
37803 last quarter bit 7 set. In the second permutation mask
37804 the first quarter will contain bit 7 set, the second quarter
37805 indexes for the first half of the op1, the third quarter bit 7 set
37806 and last quarter indexes for the second half of the op1.
37807 I.e. the first mask e.g. for V32QImode extract even will be:
37808 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
37809 (all values masked with 0xf except for -128) and second mask
37810 for extract even will be
37811 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
37812 m128
= GEN_INT (-128);
37813 for (i
= 0; i
< nelt
; ++i
)
37815 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37816 unsigned which
= d
->perm
[i
] >= nelt
;
37817 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
37819 for (j
= 0; j
< eltsz
; ++j
)
37821 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
37822 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
37826 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37827 vperm
= force_reg (V32QImode
, vperm
);
37829 l
= gen_reg_rtx (V32QImode
);
37830 op
= gen_lowpart (V32QImode
, d
->op0
);
37831 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37833 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37834 vperm
= force_reg (V32QImode
, vperm
);
37836 h
= gen_reg_rtx (V32QImode
);
37837 op
= gen_lowpart (V32QImode
, d
->op1
);
37838 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37840 ior
= gen_reg_rtx (V32QImode
);
37841 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
37843 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
37844 op
= gen_lowpart (V4DImode
, d
->target
);
37845 ior
= gen_lowpart (V4DImode
, ior
);
37846 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
37847 const1_rtx
, GEN_INT (3)));
37852 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
37853 and extract-odd permutations. */
37856 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
37863 t1
= gen_reg_rtx (V4DFmode
);
37864 t2
= gen_reg_rtx (V4DFmode
);
37866 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37867 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37868 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37870 /* Now an unpck[lh]pd will produce the result required. */
37872 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
37874 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
37880 int mask
= odd
? 0xdd : 0x88;
37882 t1
= gen_reg_rtx (V8SFmode
);
37883 t2
= gen_reg_rtx (V8SFmode
);
37884 t3
= gen_reg_rtx (V8SFmode
);
37886 /* Shuffle within the 128-bit lanes to produce:
37887 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
37888 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
37891 /* Shuffle the lanes around to produce:
37892 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
37893 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
37896 /* Shuffle within the 128-bit lanes to produce:
37897 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
37898 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
37900 /* Shuffle within the 128-bit lanes to produce:
37901 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
37902 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
37904 /* Shuffle the lanes around to produce:
37905 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
37906 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
37915 /* These are always directly implementable by expand_vec_perm_1. */
37916 gcc_unreachable ();
37920 return expand_vec_perm_pshufb2 (d
);
37923 /* We need 2*log2(N)-1 operations to achieve odd/even
37924 with interleave. */
37925 t1
= gen_reg_rtx (V8HImode
);
37926 t2
= gen_reg_rtx (V8HImode
);
37927 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
37928 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
37929 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
37930 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
37932 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
37934 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
37941 return expand_vec_perm_pshufb2 (d
);
37944 t1
= gen_reg_rtx (V16QImode
);
37945 t2
= gen_reg_rtx (V16QImode
);
37946 t3
= gen_reg_rtx (V16QImode
);
37947 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
37948 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
37949 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
37950 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
37951 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
37952 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
37954 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
37956 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
37963 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
37968 struct expand_vec_perm_d d_copy
= *d
;
37969 d_copy
.vmode
= V4DFmode
;
37970 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
37971 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
37972 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
37973 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37976 t1
= gen_reg_rtx (V4DImode
);
37977 t2
= gen_reg_rtx (V4DImode
);
37979 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37980 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37981 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37983 /* Now an vpunpck[lh]qdq will produce the result required. */
37985 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
37987 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
37994 struct expand_vec_perm_d d_copy
= *d
;
37995 d_copy
.vmode
= V8SFmode
;
37996 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
37997 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
37998 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
37999 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
38002 t1
= gen_reg_rtx (V8SImode
);
38003 t2
= gen_reg_rtx (V8SImode
);
38005 /* Shuffle the lanes around into
38006 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
38007 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
38008 gen_lowpart (V4DImode
, d
->op0
),
38009 gen_lowpart (V4DImode
, d
->op1
),
38011 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
38012 gen_lowpart (V4DImode
, d
->op0
),
38013 gen_lowpart (V4DImode
, d
->op1
),
38016 /* Swap the 2nd and 3rd position in each lane into
38017 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
38018 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
38019 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
38020 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
38021 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
38023 /* Now an vpunpck[lh]qdq will produce
38024 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
38026 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
38027 gen_lowpart (V4DImode
, t1
),
38028 gen_lowpart (V4DImode
, t2
));
38030 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
38031 gen_lowpart (V4DImode
, t1
),
38032 gen_lowpart (V4DImode
, t2
));
38037 gcc_unreachable ();
38043 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
38044 extract-even and extract-odd permutations. */
38047 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
38049 unsigned i
, odd
, nelt
= d
->nelt
;
38052 if (odd
!= 0 && odd
!= 1)
38055 for (i
= 1; i
< nelt
; ++i
)
38056 if (d
->perm
[i
] != 2 * i
+ odd
)
38059 return expand_vec_perm_even_odd_1 (d
, odd
);
38062 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
38063 permutations. We assume that expand_vec_perm_1 has already failed. */
38066 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
38068 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
38069 enum machine_mode vmode
= d
->vmode
;
38070 unsigned char perm2
[4];
38078 /* These are special-cased in sse.md so that we can optionally
38079 use the vbroadcast instruction. They expand to two insns
38080 if the input happens to be in a register. */
38081 gcc_unreachable ();
38087 /* These are always implementable using standard shuffle patterns. */
38088 gcc_unreachable ();
38092 /* These can be implemented via interleave. We save one insn by
38093 stopping once we have promoted to V4SImode and then use pshufd. */
38097 rtx (*gen
) (rtx
, rtx
, rtx
)
38098 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
38099 : gen_vec_interleave_lowv8hi
;
38103 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
38104 : gen_vec_interleave_highv8hi
;
38109 dest
= gen_reg_rtx (vmode
);
38110 emit_insn (gen (dest
, op0
, op0
));
38111 vmode
= get_mode_wider_vector (vmode
);
38112 op0
= gen_lowpart (vmode
, dest
);
38114 while (vmode
!= V4SImode
);
38116 memset (perm2
, elt
, 4);
38117 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
38126 /* For AVX2 broadcasts of the first element vpbroadcast* or
38127 vpermq should be used by expand_vec_perm_1. */
38128 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
38132 gcc_unreachable ();
38136 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
38137 broadcast permutations. */
38140 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
38142 unsigned i
, elt
, nelt
= d
->nelt
;
38144 if (!d
->one_operand_p
)
38148 for (i
= 1; i
< nelt
; ++i
)
38149 if (d
->perm
[i
] != elt
)
38152 return expand_vec_perm_broadcast_1 (d
);
38155 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
38156 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
38157 all the shorter instruction sequences. */
38160 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
38162 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
38163 unsigned int i
, nelt
, eltsz
;
38167 || d
->one_operand_p
38168 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
38175 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38177 /* Generate 4 permutation masks. If the required element is within
38178 the same lane, it is shuffled in. If the required element from the
38179 other lane, force a zero by setting bit 7 in the permutation mask.
38180 In the other mask the mask has non-negative elements if element
38181 is requested from the other lane, but also moved to the other lane,
38182 so that the result of vpshufb can have the two V2TImode halves
38184 m128
= GEN_INT (-128);
38185 for (i
= 0; i
< 32; ++i
)
38187 rperm
[0][i
] = m128
;
38188 rperm
[1][i
] = m128
;
38189 rperm
[2][i
] = m128
;
38190 rperm
[3][i
] = m128
;
38196 for (i
= 0; i
< nelt
; ++i
)
38198 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38199 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
38200 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
38202 for (j
= 0; j
< eltsz
; ++j
)
38203 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
38204 used
[which
] = true;
38207 for (i
= 0; i
< 2; ++i
)
38209 if (!used
[2 * i
+ 1])
38214 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
38215 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
38216 vperm
= force_reg (V32QImode
, vperm
);
38217 h
[i
] = gen_reg_rtx (V32QImode
);
38218 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
38219 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
38222 /* Swap the 128-byte lanes of h[X]. */
38223 for (i
= 0; i
< 2; ++i
)
38225 if (h
[i
] == NULL_RTX
)
38227 op
= gen_reg_rtx (V4DImode
);
38228 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
38229 const2_rtx
, GEN_INT (3), const0_rtx
,
38231 h
[i
] = gen_lowpart (V32QImode
, op
);
38234 for (i
= 0; i
< 2; ++i
)
38241 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
38242 vperm
= force_reg (V32QImode
, vperm
);
38243 l
[i
] = gen_reg_rtx (V32QImode
);
38244 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
38245 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
38248 for (i
= 0; i
< 2; ++i
)
38252 op
= gen_reg_rtx (V32QImode
);
38253 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
38260 gcc_assert (l
[0] && l
[1]);
38261 op
= gen_lowpart (V32QImode
, d
->target
);
38262 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
38266 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
38267 With all of the interface bits taken care of, perform the expansion
38268 in D and return true on success. */
38271 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
38273 /* Try a single instruction expansion. */
38274 if (expand_vec_perm_1 (d
))
38277 /* Try sequences of two instructions. */
38279 if (expand_vec_perm_pshuflw_pshufhw (d
))
38282 if (expand_vec_perm_palignr (d
))
38285 if (expand_vec_perm_interleave2 (d
))
38288 if (expand_vec_perm_broadcast (d
))
38291 if (expand_vec_perm_vpermq_perm_1 (d
))
38294 if (expand_vec_perm_vperm2f128 (d
))
38297 /* Try sequences of three instructions. */
38299 if (expand_vec_perm_2vperm2f128_vshuf (d
))
38302 if (expand_vec_perm_pshufb2 (d
))
38305 if (expand_vec_perm_interleave3 (d
))
38308 if (expand_vec_perm_vperm2f128_vblend (d
))
38311 /* Try sequences of four instructions. */
38313 if (expand_vec_perm_vpshufb2_vpermq (d
))
38316 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
38319 /* ??? Look for narrow permutations whose element orderings would
38320 allow the promotion to a wider mode. */
38322 /* ??? Look for sequences of interleave or a wider permute that place
38323 the data into the correct lanes for a half-vector shuffle like
38324 pshuf[lh]w or vpermilps. */
38326 /* ??? Look for sequences of interleave that produce the desired results.
38327 The combinatorics of punpck[lh] get pretty ugly... */
38329 if (expand_vec_perm_even_odd (d
))
38332 /* Even longer sequences. */
38333 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
38339 /* If a permutation only uses one operand, make it clear. Returns true
38340 if the permutation references both operands. */
38343 canonicalize_perm (struct expand_vec_perm_d
*d
)
38345 int i
, which
, nelt
= d
->nelt
;
38347 for (i
= which
= 0; i
< nelt
; ++i
)
38348 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
38350 d
->one_operand_p
= true;
38357 if (!rtx_equal_p (d
->op0
, d
->op1
))
38359 d
->one_operand_p
= false;
38362 /* The elements of PERM do not suggest that only the first operand
38363 is used, but both operands are identical. Allow easier matching
38364 of the permutation by folding the permutation into the single
38369 for (i
= 0; i
< nelt
; ++i
)
38370 d
->perm
[i
] &= nelt
- 1;
38379 return (which
== 3);
38383 ix86_expand_vec_perm_const (rtx operands
[4])
38385 struct expand_vec_perm_d d
;
38386 unsigned char perm
[MAX_VECT_LEN
];
38391 d
.target
= operands
[0];
38392 d
.op0
= operands
[1];
38393 d
.op1
= operands
[2];
38396 d
.vmode
= GET_MODE (d
.target
);
38397 gcc_assert (VECTOR_MODE_P (d
.vmode
));
38398 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38399 d
.testing_p
= false;
38401 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
38402 gcc_assert (XVECLEN (sel
, 0) == nelt
);
38403 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
38405 for (i
= 0; i
< nelt
; ++i
)
38407 rtx e
= XVECEXP (sel
, 0, i
);
38408 int ei
= INTVAL (e
) & (2 * nelt
- 1);
38413 two_args
= canonicalize_perm (&d
);
38415 if (ix86_expand_vec_perm_const_1 (&d
))
38418 /* If the selector says both arguments are needed, but the operands are the
38419 same, the above tried to expand with one_operand_p and flattened selector.
38420 If that didn't work, retry without one_operand_p; we succeeded with that
38422 if (two_args
&& d
.one_operand_p
)
38424 d
.one_operand_p
= false;
38425 memcpy (d
.perm
, perm
, sizeof (perm
));
38426 return ix86_expand_vec_perm_const_1 (&d
);
38432 /* Implement targetm.vectorize.vec_perm_const_ok. */
38435 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
38436 const unsigned char *sel
)
38438 struct expand_vec_perm_d d
;
38439 unsigned int i
, nelt
, which
;
38443 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38444 d
.testing_p
= true;
38446 /* Given sufficient ISA support we can just return true here
38447 for selected vector modes. */
38448 if (GET_MODE_SIZE (d
.vmode
) == 16)
38450 /* All implementable with a single vpperm insn. */
38453 /* All implementable with 2 pshufb + 1 ior. */
38456 /* All implementable with shufpd or unpck[lh]pd. */
38461 /* Extract the values from the vector CST into the permutation
38463 memcpy (d
.perm
, sel
, nelt
);
38464 for (i
= which
= 0; i
< nelt
; ++i
)
38466 unsigned char e
= d
.perm
[i
];
38467 gcc_assert (e
< 2 * nelt
);
38468 which
|= (e
< nelt
? 1 : 2);
38471 /* For all elements from second vector, fold the elements to first. */
38473 for (i
= 0; i
< nelt
; ++i
)
38476 /* Check whether the mask can be applied to the vector type. */
38477 d
.one_operand_p
= (which
!= 3);
38479 /* Implementable with shufps or pshufd. */
38480 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
38483 /* Otherwise we have to go through the motions and see if we can
38484 figure out how to generate the requested permutation. */
38485 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
38486 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
38487 if (!d
.one_operand_p
)
38488 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
38491 ret
= ix86_expand_vec_perm_const_1 (&d
);
38498 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
38500 struct expand_vec_perm_d d
;
38506 d
.vmode
= GET_MODE (targ
);
38507 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38508 d
.one_operand_p
= false;
38509 d
.testing_p
= false;
38511 for (i
= 0; i
< nelt
; ++i
)
38512 d
.perm
[i
] = i
* 2 + odd
;
38514 /* We'll either be able to implement the permutation directly... */
38515 if (expand_vec_perm_1 (&d
))
38518 /* ... or we use the special-case patterns. */
38519 expand_vec_perm_even_odd_1 (&d
, odd
);
38522 /* Expand a vector operation CODE for a V*QImode in terms of the
38523 same operation on V*HImode. */
38526 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
38528 enum machine_mode qimode
= GET_MODE (dest
);
38529 enum machine_mode himode
;
38530 rtx (*gen_il
) (rtx
, rtx
, rtx
);
38531 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
38532 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
38533 struct expand_vec_perm_d d
;
38534 bool ok
, full_interleave
;
38535 bool uns_p
= false;
38542 gen_il
= gen_vec_interleave_lowv16qi
;
38543 gen_ih
= gen_vec_interleave_highv16qi
;
38546 himode
= V16HImode
;
38547 gen_il
= gen_avx2_interleave_lowv32qi
;
38548 gen_ih
= gen_avx2_interleave_highv32qi
;
38551 gcc_unreachable ();
38554 op2_l
= op2_h
= op2
;
38558 /* Unpack data such that we've got a source byte in each low byte of
38559 each word. We don't care what goes into the high byte of each word.
38560 Rather than trying to get zero in there, most convenient is to let
38561 it be a copy of the low byte. */
38562 op2_l
= gen_reg_rtx (qimode
);
38563 op2_h
= gen_reg_rtx (qimode
);
38564 emit_insn (gen_il (op2_l
, op2
, op2
));
38565 emit_insn (gen_ih (op2_h
, op2
, op2
));
38568 op1_l
= gen_reg_rtx (qimode
);
38569 op1_h
= gen_reg_rtx (qimode
);
38570 emit_insn (gen_il (op1_l
, op1
, op1
));
38571 emit_insn (gen_ih (op1_h
, op1
, op1
));
38572 full_interleave
= qimode
== V16QImode
;
38580 op1_l
= gen_reg_rtx (himode
);
38581 op1_h
= gen_reg_rtx (himode
);
38582 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
38583 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
38584 full_interleave
= true;
38587 gcc_unreachable ();
38590 /* Perform the operation. */
38591 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
38593 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
38595 gcc_assert (res_l
&& res_h
);
38597 /* Merge the data back into the right place. */
38599 d
.op0
= gen_lowpart (qimode
, res_l
);
38600 d
.op1
= gen_lowpart (qimode
, res_h
);
38602 d
.nelt
= GET_MODE_NUNITS (qimode
);
38603 d
.one_operand_p
= false;
38604 d
.testing_p
= false;
38606 if (full_interleave
)
38608 /* For SSE2, we used an full interleave, so the desired
38609 results are in the even elements. */
38610 for (i
= 0; i
< 32; ++i
)
38615 /* For AVX, the interleave used above was not cross-lane. So the
38616 extraction is evens but with the second and third quarter swapped.
38617 Happily, that is even one insn shorter than even extraction. */
38618 for (i
= 0; i
< 32; ++i
)
38619 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
38622 ok
= ix86_expand_vec_perm_const_1 (&d
);
38625 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
38626 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
38630 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
38632 rtx op1_m1
, op1_m2
;
38633 rtx op2_m1
, op2_m2
;
38636 /* Shift both input vectors down one element, so that elements 3
38637 and 1 are now in the slots for elements 2 and 0. For K8, at
38638 least, this is faster than using a shuffle. */
38639 op1_m1
= op1
= force_reg (V4SImode
, op1
);
38640 op1_m2
= gen_reg_rtx (V4SImode
);
38641 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, op1_m2
),
38642 gen_lowpart (V1TImode
, op1
),
38645 if (GET_CODE (op2
) == CONST_VECTOR
)
38649 /* Constant propagate the vector shift, leaving the dont-care
38650 vector elements as zero. */
38651 v
= rtvec_alloc (4);
38652 RTVEC_ELT (v
, 0) = CONST_VECTOR_ELT (op2
, 0);
38653 RTVEC_ELT (v
, 2) = CONST_VECTOR_ELT (op2
, 2);
38654 RTVEC_ELT (v
, 1) = const0_rtx
;
38655 RTVEC_ELT (v
, 3) = const0_rtx
;
38656 op2_m1
= gen_rtx_CONST_VECTOR (V4SImode
, v
);
38657 op2_m1
= force_reg (V4SImode
, op2_m1
);
38659 v
= rtvec_alloc (4);
38660 RTVEC_ELT (v
, 0) = CONST_VECTOR_ELT (op2
, 1);
38661 RTVEC_ELT (v
, 2) = CONST_VECTOR_ELT (op2
, 3);
38662 RTVEC_ELT (v
, 1) = const0_rtx
;
38663 RTVEC_ELT (v
, 3) = const0_rtx
;
38664 op2_m2
= gen_rtx_CONST_VECTOR (V4SImode
, v
);
38665 op2_m2
= force_reg (V4SImode
, op2_m2
);
38669 op2_m1
= op2
= force_reg (V4SImode
, op2
);
38670 op2_m2
= gen_reg_rtx (V4SImode
);
38671 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, op2_m2
),
38672 gen_lowpart (V1TImode
, op2
),
38676 /* Widening multiply of elements 0+2, and 1+3. */
38677 res_1
= gen_reg_rtx (V4SImode
);
38678 res_2
= gen_reg_rtx (V4SImode
);
38679 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode
, res_1
),
38681 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode
, res_2
),
38684 /* Move the results in element 2 down to element 1; we don't care
38685 what goes in elements 2 and 3. Then we can merge the parts
38686 back together with an interleave.
38688 Note that two other sequences were tried:
38689 (1) Use interleaves at the start instead of psrldq, which allows
38690 us to use a single shufps to merge things back at the end.
38691 (2) Use shufps here to combine the two vectors, then pshufd to
38692 put the elements in the correct order.
38693 In both cases the cost of the reformatting stall was too high
38694 and the overall sequence slower. */
38696 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
38697 const0_rtx
, const0_rtx
));
38698 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
38699 const0_rtx
, const0_rtx
));
38700 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
38702 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
38705 /* Expand an insert into a vector register through pinsr insn.
38706 Return true if successful. */
38709 ix86_expand_pinsr (rtx
*operands
)
38711 rtx dst
= operands
[0];
38712 rtx src
= operands
[3];
38714 unsigned int size
= INTVAL (operands
[1]);
38715 unsigned int pos
= INTVAL (operands
[2]);
38717 if (GET_CODE (dst
) == SUBREG
)
38719 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
38720 dst
= SUBREG_REG (dst
);
38723 if (GET_CODE (src
) == SUBREG
)
38724 src
= SUBREG_REG (src
);
38726 switch (GET_MODE (dst
))
38733 enum machine_mode srcmode
, dstmode
;
38734 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
38736 srcmode
= mode_for_size (size
, MODE_INT
, 0);
38741 if (!TARGET_SSE4_1
)
38743 dstmode
= V16QImode
;
38744 pinsr
= gen_sse4_1_pinsrb
;
38750 dstmode
= V8HImode
;
38751 pinsr
= gen_sse2_pinsrw
;
38755 if (!TARGET_SSE4_1
)
38757 dstmode
= V4SImode
;
38758 pinsr
= gen_sse4_1_pinsrd
;
38762 gcc_assert (TARGET_64BIT
);
38763 if (!TARGET_SSE4_1
)
38765 dstmode
= V2DImode
;
38766 pinsr
= gen_sse4_1_pinsrq
;
38773 dst
= gen_lowpart (dstmode
, dst
);
38774 src
= gen_lowpart (srcmode
, src
);
38778 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
38787 /* This function returns the calling abi specific va_list type node.
38788 It returns the FNDECL specific va_list type. */
38791 ix86_fn_abi_va_list (tree fndecl
)
38794 return va_list_type_node
;
38795 gcc_assert (fndecl
!= NULL_TREE
);
38797 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
38798 return ms_va_list_type_node
;
38800 return sysv_va_list_type_node
;
38803 /* Returns the canonical va_list type specified by TYPE. If there
38804 is no valid TYPE provided, it return NULL_TREE. */
38807 ix86_canonical_va_list_type (tree type
)
38811 /* Resolve references and pointers to va_list type. */
38812 if (TREE_CODE (type
) == MEM_REF
)
38813 type
= TREE_TYPE (type
);
38814 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
38815 type
= TREE_TYPE (type
);
38816 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
38817 type
= TREE_TYPE (type
);
38819 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
38821 wtype
= va_list_type_node
;
38822 gcc_assert (wtype
!= NULL_TREE
);
38824 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38826 /* If va_list is an array type, the argument may have decayed
38827 to a pointer type, e.g. by being passed to another function.
38828 In that case, unwrap both types so that we can compare the
38829 underlying records. */
38830 if (TREE_CODE (htype
) == ARRAY_TYPE
38831 || POINTER_TYPE_P (htype
))
38833 wtype
= TREE_TYPE (wtype
);
38834 htype
= TREE_TYPE (htype
);
38837 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38838 return va_list_type_node
;
38839 wtype
= sysv_va_list_type_node
;
38840 gcc_assert (wtype
!= NULL_TREE
);
38842 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38844 /* If va_list is an array type, the argument may have decayed
38845 to a pointer type, e.g. by being passed to another function.
38846 In that case, unwrap both types so that we can compare the
38847 underlying records. */
38848 if (TREE_CODE (htype
) == ARRAY_TYPE
38849 || POINTER_TYPE_P (htype
))
38851 wtype
= TREE_TYPE (wtype
);
38852 htype
= TREE_TYPE (htype
);
38855 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38856 return sysv_va_list_type_node
;
38857 wtype
= ms_va_list_type_node
;
38858 gcc_assert (wtype
!= NULL_TREE
);
38860 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38862 /* If va_list is an array type, the argument may have decayed
38863 to a pointer type, e.g. by being passed to another function.
38864 In that case, unwrap both types so that we can compare the
38865 underlying records. */
38866 if (TREE_CODE (htype
) == ARRAY_TYPE
38867 || POINTER_TYPE_P (htype
))
38869 wtype
= TREE_TYPE (wtype
);
38870 htype
= TREE_TYPE (htype
);
38873 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38874 return ms_va_list_type_node
;
38877 return std_canonical_va_list_type (type
);
38880 /* Iterate through the target-specific builtin types for va_list.
38881 IDX denotes the iterator, *PTREE is set to the result type of
38882 the va_list builtin, and *PNAME to its internal type.
38883 Returns zero if there is no element for this index, otherwise
38884 IDX should be increased upon the next call.
38885 Note, do not iterate a base builtin's name like __builtin_va_list.
38886 Used from c_common_nodes_and_builtins. */
38889 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
38899 *ptree
= ms_va_list_type_node
;
38900 *pname
= "__builtin_ms_va_list";
38904 *ptree
= sysv_va_list_type_node
;
38905 *pname
= "__builtin_sysv_va_list";
38913 #undef TARGET_SCHED_DISPATCH
38914 #define TARGET_SCHED_DISPATCH has_dispatch
38915 #undef TARGET_SCHED_DISPATCH_DO
38916 #define TARGET_SCHED_DISPATCH_DO do_dispatch
38917 #undef TARGET_SCHED_REASSOCIATION_WIDTH
38918 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
38919 #undef TARGET_SCHED_REORDER
38920 #define TARGET_SCHED_REORDER ix86_sched_reorder
38922 /* The size of the dispatch window is the total number of bytes of
38923 object code allowed in a window. */
38924 #define DISPATCH_WINDOW_SIZE 16
38926 /* Number of dispatch windows considered for scheduling. */
38927 #define MAX_DISPATCH_WINDOWS 3
38929 /* Maximum number of instructions in a window. */
38932 /* Maximum number of immediate operands in a window. */
38935 /* Maximum number of immediate bits allowed in a window. */
38936 #define MAX_IMM_SIZE 128
38938 /* Maximum number of 32 bit immediates allowed in a window. */
38939 #define MAX_IMM_32 4
38941 /* Maximum number of 64 bit immediates allowed in a window. */
38942 #define MAX_IMM_64 2
38944 /* Maximum total of loads or prefetches allowed in a window. */
38947 /* Maximum total of stores allowed in a window. */
38948 #define MAX_STORE 1
38954 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
38955 enum dispatch_group
{
38970 /* Number of allowable groups in a dispatch window. It is an array
38971 indexed by dispatch_group enum. 100 is used as a big number,
38972 because the number of these kind of operations does not have any
38973 effect in dispatch window, but we need them for other reasons in
38975 static unsigned int num_allowable_groups
[disp_last
] = {
38976 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
38979 char group_name
[disp_last
+ 1][16] = {
38980 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
38981 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
38982 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
38985 /* Instruction path. */
38988 path_single
, /* Single micro op. */
38989 path_double
, /* Double micro op. */
38990 path_multi
, /* Instructions with more than 2 micro op.. */
38994 /* sched_insn_info defines a window to the instructions scheduled in
38995 the basic block. It contains a pointer to the insn_info table and
38996 the instruction scheduled.
38998 Windows are allocated for each basic block and are linked
39000 typedef struct sched_insn_info_s
{
39002 enum dispatch_group group
;
39003 enum insn_path path
;
39008 /* Linked list of dispatch windows. This is a two way list of
39009 dispatch windows of a basic block. It contains information about
39010 the number of uops in the window and the total number of
39011 instructions and of bytes in the object code for this dispatch
39013 typedef struct dispatch_windows_s
{
39014 int num_insn
; /* Number of insn in the window. */
39015 int num_uops
; /* Number of uops in the window. */
39016 int window_size
; /* Number of bytes in the window. */
39017 int window_num
; /* Window number between 0 or 1. */
39018 int num_imm
; /* Number of immediates in an insn. */
39019 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
39020 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
39021 int imm_size
; /* Total immediates in the window. */
39022 int num_loads
; /* Total memory loads in the window. */
39023 int num_stores
; /* Total memory stores in the window. */
39024 int violation
; /* Violation exists in window. */
39025 sched_insn_info
*window
; /* Pointer to the window. */
39026 struct dispatch_windows_s
*next
;
39027 struct dispatch_windows_s
*prev
;
39028 } dispatch_windows
;
39030 /* Immediate valuse used in an insn. */
39031 typedef struct imm_info_s
39038 static dispatch_windows
*dispatch_window_list
;
39039 static dispatch_windows
*dispatch_window_list1
;
39041 /* Get dispatch group of insn. */
39043 static enum dispatch_group
39044 get_mem_group (rtx insn
)
39046 enum attr_memory memory
;
39048 if (INSN_CODE (insn
) < 0)
39049 return disp_no_group
;
39050 memory
= get_attr_memory (insn
);
39051 if (memory
== MEMORY_STORE
)
39054 if (memory
== MEMORY_LOAD
)
39057 if (memory
== MEMORY_BOTH
)
39058 return disp_load_store
;
39060 return disp_no_group
;
39063 /* Return true if insn is a compare instruction. */
39068 enum attr_type type
;
39070 type
= get_attr_type (insn
);
39071 return (type
== TYPE_TEST
39072 || type
== TYPE_ICMP
39073 || type
== TYPE_FCMP
39074 || GET_CODE (PATTERN (insn
)) == COMPARE
);
39077 /* Return true if a dispatch violation encountered. */
39080 dispatch_violation (void)
39082 if (dispatch_window_list
->next
)
39083 return dispatch_window_list
->next
->violation
;
39084 return dispatch_window_list
->violation
;
39087 /* Return true if insn is a branch instruction. */
39090 is_branch (rtx insn
)
39092 return (CALL_P (insn
) || JUMP_P (insn
));
39095 /* Return true if insn is a prefetch instruction. */
39098 is_prefetch (rtx insn
)
39100 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
39103 /* This function initializes a dispatch window and the list container holding a
39104 pointer to the window. */
39107 init_window (int window_num
)
39110 dispatch_windows
*new_list
;
39112 if (window_num
== 0)
39113 new_list
= dispatch_window_list
;
39115 new_list
= dispatch_window_list1
;
39117 new_list
->num_insn
= 0;
39118 new_list
->num_uops
= 0;
39119 new_list
->window_size
= 0;
39120 new_list
->next
= NULL
;
39121 new_list
->prev
= NULL
;
39122 new_list
->window_num
= window_num
;
39123 new_list
->num_imm
= 0;
39124 new_list
->num_imm_32
= 0;
39125 new_list
->num_imm_64
= 0;
39126 new_list
->imm_size
= 0;
39127 new_list
->num_loads
= 0;
39128 new_list
->num_stores
= 0;
39129 new_list
->violation
= false;
39131 for (i
= 0; i
< MAX_INSN
; i
++)
39133 new_list
->window
[i
].insn
= NULL
;
39134 new_list
->window
[i
].group
= disp_no_group
;
39135 new_list
->window
[i
].path
= no_path
;
39136 new_list
->window
[i
].byte_len
= 0;
39137 new_list
->window
[i
].imm_bytes
= 0;
39142 /* This function allocates and initializes a dispatch window and the
39143 list container holding a pointer to the window. */
39145 static dispatch_windows
*
39146 allocate_window (void)
39148 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
39149 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
39154 /* This routine initializes the dispatch scheduling information. It
39155 initiates building dispatch scheduler tables and constructs the
39156 first dispatch window. */
39159 init_dispatch_sched (void)
39161 /* Allocate a dispatch list and a window. */
39162 dispatch_window_list
= allocate_window ();
39163 dispatch_window_list1
= allocate_window ();
39168 /* This function returns true if a branch is detected. End of a basic block
39169 does not have to be a branch, but here we assume only branches end a
39173 is_end_basic_block (enum dispatch_group group
)
39175 return group
== disp_branch
;
39178 /* This function is called when the end of a window processing is reached. */
39181 process_end_window (void)
39183 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
39184 if (dispatch_window_list
->next
)
39186 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
39187 gcc_assert (dispatch_window_list
->window_size
39188 + dispatch_window_list1
->window_size
<= 48);
39194 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
39195 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
39196 for 48 bytes of instructions. Note that these windows are not dispatch
39197 windows that their sizes are DISPATCH_WINDOW_SIZE. */
39199 static dispatch_windows
*
39200 allocate_next_window (int window_num
)
39202 if (window_num
== 0)
39204 if (dispatch_window_list
->next
)
39207 return dispatch_window_list
;
39210 dispatch_window_list
->next
= dispatch_window_list1
;
39211 dispatch_window_list1
->prev
= dispatch_window_list
;
39213 return dispatch_window_list1
;
39216 /* Increment the number of immediate operands of an instruction. */
39219 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
39224 switch ( GET_CODE (*in_rtx
))
39229 (imm_values
->imm
)++;
39230 if (x86_64_immediate_operand (*in_rtx
, SImode
))
39231 (imm_values
->imm32
)++;
39233 (imm_values
->imm64
)++;
39237 (imm_values
->imm
)++;
39238 (imm_values
->imm64
)++;
39242 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
39244 (imm_values
->imm
)++;
39245 (imm_values
->imm32
)++;
39256 /* Compute number of immediate operands of an instruction. */
39259 find_constant (rtx in_rtx
, imm_info
*imm_values
)
39261 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
39262 (rtx_function
) find_constant_1
, (void *) imm_values
);
39265 /* Return total size of immediate operands of an instruction along with number
39266 of corresponding immediate-operands. It initializes its parameters to zero
39267 befor calling FIND_CONSTANT.
39268 INSN is the input instruction. IMM is the total of immediates.
39269 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
39273 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
39275 imm_info imm_values
= {0, 0, 0};
39277 find_constant (insn
, &imm_values
);
39278 *imm
= imm_values
.imm
;
39279 *imm32
= imm_values
.imm32
;
39280 *imm64
= imm_values
.imm64
;
39281 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
39284 /* This function indicates if an operand of an instruction is an
39288 has_immediate (rtx insn
)
39290 int num_imm_operand
;
39291 int num_imm32_operand
;
39292 int num_imm64_operand
;
39295 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39296 &num_imm64_operand
);
39300 /* Return single or double path for instructions. */
39302 static enum insn_path
39303 get_insn_path (rtx insn
)
39305 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
39307 if ((int)path
== 0)
39308 return path_single
;
39310 if ((int)path
== 1)
39311 return path_double
;
39316 /* Return insn dispatch group. */
39318 static enum dispatch_group
39319 get_insn_group (rtx insn
)
39321 enum dispatch_group group
= get_mem_group (insn
);
39325 if (is_branch (insn
))
39326 return disp_branch
;
39331 if (has_immediate (insn
))
39334 if (is_prefetch (insn
))
39335 return disp_prefetch
;
39337 return disp_no_group
;
39340 /* Count number of GROUP restricted instructions in a dispatch
39341 window WINDOW_LIST. */
39344 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
39346 enum dispatch_group group
= get_insn_group (insn
);
39348 int num_imm_operand
;
39349 int num_imm32_operand
;
39350 int num_imm64_operand
;
39352 if (group
== disp_no_group
)
39355 if (group
== disp_imm
)
39357 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39358 &num_imm64_operand
);
39359 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
39360 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
39361 || (num_imm32_operand
> 0
39362 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
39363 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
39364 || (num_imm64_operand
> 0
39365 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
39366 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
39367 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
39368 && num_imm64_operand
> 0
39369 && ((window_list
->num_imm_64
> 0
39370 && window_list
->num_insn
>= 2)
39371 || window_list
->num_insn
>= 3)))
39377 if ((group
== disp_load_store
39378 && (window_list
->num_loads
>= MAX_LOAD
39379 || window_list
->num_stores
>= MAX_STORE
))
39380 || ((group
== disp_load
39381 || group
== disp_prefetch
)
39382 && window_list
->num_loads
>= MAX_LOAD
)
39383 || (group
== disp_store
39384 && window_list
->num_stores
>= MAX_STORE
))
39390 /* This function returns true if insn satisfies dispatch rules on the
39391 last window scheduled. */
39394 fits_dispatch_window (rtx insn
)
39396 dispatch_windows
*window_list
= dispatch_window_list
;
39397 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
39398 unsigned int num_restrict
;
39399 enum dispatch_group group
= get_insn_group (insn
);
39400 enum insn_path path
= get_insn_path (insn
);
39403 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
39404 instructions should be given the lowest priority in the
39405 scheduling process in Haifa scheduler to make sure they will be
39406 scheduled in the same dispatch window as the reference to them. */
39407 if (group
== disp_jcc
|| group
== disp_cmp
)
39410 /* Check nonrestricted. */
39411 if (group
== disp_no_group
|| group
== disp_branch
)
39414 /* Get last dispatch window. */
39415 if (window_list_next
)
39416 window_list
= window_list_next
;
39418 if (window_list
->window_num
== 1)
39420 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
39423 || (min_insn_size (insn
) + sum
) >= 48)
39424 /* Window 1 is full. Go for next window. */
39428 num_restrict
= count_num_restricted (insn
, window_list
);
39430 if (num_restrict
> num_allowable_groups
[group
])
39433 /* See if it fits in the first window. */
39434 if (window_list
->window_num
== 0)
39436 /* The first widow should have only single and double path
39438 if (path
== path_double
39439 && (window_list
->num_uops
+ 2) > MAX_INSN
)
39441 else if (path
!= path_single
)
39447 /* Add an instruction INSN with NUM_UOPS micro-operations to the
39448 dispatch window WINDOW_LIST. */
39451 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
39453 int byte_len
= min_insn_size (insn
);
39454 int num_insn
= window_list
->num_insn
;
39456 sched_insn_info
*window
= window_list
->window
;
39457 enum dispatch_group group
= get_insn_group (insn
);
39458 enum insn_path path
= get_insn_path (insn
);
39459 int num_imm_operand
;
39460 int num_imm32_operand
;
39461 int num_imm64_operand
;
39463 if (!window_list
->violation
&& group
!= disp_cmp
39464 && !fits_dispatch_window (insn
))
39465 window_list
->violation
= true;
39467 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39468 &num_imm64_operand
);
39470 /* Initialize window with new instruction. */
39471 window
[num_insn
].insn
= insn
;
39472 window
[num_insn
].byte_len
= byte_len
;
39473 window
[num_insn
].group
= group
;
39474 window
[num_insn
].path
= path
;
39475 window
[num_insn
].imm_bytes
= imm_size
;
39477 window_list
->window_size
+= byte_len
;
39478 window_list
->num_insn
= num_insn
+ 1;
39479 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
39480 window_list
->imm_size
+= imm_size
;
39481 window_list
->num_imm
+= num_imm_operand
;
39482 window_list
->num_imm_32
+= num_imm32_operand
;
39483 window_list
->num_imm_64
+= num_imm64_operand
;
39485 if (group
== disp_store
)
39486 window_list
->num_stores
+= 1;
39487 else if (group
== disp_load
39488 || group
== disp_prefetch
)
39489 window_list
->num_loads
+= 1;
39490 else if (group
== disp_load_store
)
39492 window_list
->num_stores
+= 1;
39493 window_list
->num_loads
+= 1;
39497 /* Adds a scheduled instruction, INSN, to the current dispatch window.
39498 If the total bytes of instructions or the number of instructions in
39499 the window exceed allowable, it allocates a new window. */
39502 add_to_dispatch_window (rtx insn
)
39505 dispatch_windows
*window_list
;
39506 dispatch_windows
*next_list
;
39507 dispatch_windows
*window0_list
;
39508 enum insn_path path
;
39509 enum dispatch_group insn_group
;
39517 if (INSN_CODE (insn
) < 0)
39520 byte_len
= min_insn_size (insn
);
39521 window_list
= dispatch_window_list
;
39522 next_list
= window_list
->next
;
39523 path
= get_insn_path (insn
);
39524 insn_group
= get_insn_group (insn
);
39526 /* Get the last dispatch window. */
39528 window_list
= dispatch_window_list
->next
;
39530 if (path
== path_single
)
39532 else if (path
== path_double
)
39535 insn_num_uops
= (int) path
;
39537 /* If current window is full, get a new window.
39538 Window number zero is full, if MAX_INSN uops are scheduled in it.
39539 Window number one is full, if window zero's bytes plus window
39540 one's bytes is 32, or if the bytes of the new instruction added
39541 to the total makes it greater than 48, or it has already MAX_INSN
39542 instructions in it. */
39543 num_insn
= window_list
->num_insn
;
39544 num_uops
= window_list
->num_uops
;
39545 window_num
= window_list
->window_num
;
39546 insn_fits
= fits_dispatch_window (insn
);
39548 if (num_insn
>= MAX_INSN
39549 || num_uops
+ insn_num_uops
> MAX_INSN
39552 window_num
= ~window_num
& 1;
39553 window_list
= allocate_next_window (window_num
);
39556 if (window_num
== 0)
39558 add_insn_window (insn
, window_list
, insn_num_uops
);
39559 if (window_list
->num_insn
>= MAX_INSN
39560 && insn_group
== disp_branch
)
39562 process_end_window ();
39566 else if (window_num
== 1)
39568 window0_list
= window_list
->prev
;
39569 sum
= window0_list
->window_size
+ window_list
->window_size
;
39571 || (byte_len
+ sum
) >= 48)
39573 process_end_window ();
39574 window_list
= dispatch_window_list
;
39577 add_insn_window (insn
, window_list
, insn_num_uops
);
39580 gcc_unreachable ();
39582 if (is_end_basic_block (insn_group
))
39584 /* End of basic block is reached do end-basic-block process. */
39585 process_end_window ();
39590 /* Print the dispatch window, WINDOW_NUM, to FILE. */
39592 DEBUG_FUNCTION
static void
39593 debug_dispatch_window_file (FILE *file
, int window_num
)
39595 dispatch_windows
*list
;
39598 if (window_num
== 0)
39599 list
= dispatch_window_list
;
39601 list
= dispatch_window_list1
;
39603 fprintf (file
, "Window #%d:\n", list
->window_num
);
39604 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
39605 list
->num_insn
, list
->num_uops
, list
->window_size
);
39606 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
39607 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
39609 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
39611 fprintf (file
, " insn info:\n");
39613 for (i
= 0; i
< MAX_INSN
; i
++)
39615 if (!list
->window
[i
].insn
)
39617 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
39618 i
, group_name
[list
->window
[i
].group
],
39619 i
, (void *)list
->window
[i
].insn
,
39620 i
, list
->window
[i
].path
,
39621 i
, list
->window
[i
].byte_len
,
39622 i
, list
->window
[i
].imm_bytes
);
39626 /* Print to stdout a dispatch window. */
39628 DEBUG_FUNCTION
void
39629 debug_dispatch_window (int window_num
)
39631 debug_dispatch_window_file (stdout
, window_num
);
39634 /* Print INSN dispatch information to FILE. */
39636 DEBUG_FUNCTION
static void
39637 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
39640 enum insn_path path
;
39641 enum dispatch_group group
;
39643 int num_imm_operand
;
39644 int num_imm32_operand
;
39645 int num_imm64_operand
;
39647 if (INSN_CODE (insn
) < 0)
39650 byte_len
= min_insn_size (insn
);
39651 path
= get_insn_path (insn
);
39652 group
= get_insn_group (insn
);
39653 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39654 &num_imm64_operand
);
39656 fprintf (file
, " insn info:\n");
39657 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
39658 group_name
[group
], path
, byte_len
);
39659 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
39660 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
39663 /* Print to STDERR the status of the ready list with respect to
39664 dispatch windows. */
39666 DEBUG_FUNCTION
void
39667 debug_ready_dispatch (void)
39670 int no_ready
= number_in_ready ();
39672 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
39674 for (i
= 0; i
< no_ready
; i
++)
39675 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
39678 /* This routine is the driver of the dispatch scheduler. */
39681 do_dispatch (rtx insn
, int mode
)
39683 if (mode
== DISPATCH_INIT
)
39684 init_dispatch_sched ();
39685 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
39686 add_to_dispatch_window (insn
);
39689 /* Return TRUE if Dispatch Scheduling is supported. */
39692 has_dispatch (rtx insn
, int action
)
39694 if ((TARGET_BDVER1
|| TARGET_BDVER2
)
39695 && flag_dispatch_scheduler
)
39701 case IS_DISPATCH_ON
:
39706 return is_cmp (insn
);
39708 case DISPATCH_VIOLATION
:
39709 return dispatch_violation ();
39711 case FITS_DISPATCH_WINDOW
:
39712 return fits_dispatch_window (insn
);
39718 /* Implementation of reassociation_width target hook used by
39719 reassoc phase to identify parallelism level in reassociated
39720 tree. Statements tree_code is passed in OPC. Arguments type
39723 Currently parallel reassociation is enabled for Atom
39724 processors only and we set reassociation width to be 2
39725 because Atom may issue up to 2 instructions per cycle.
39727 Return value should be fixed if parallel reassociation is
39728 enabled for other processors. */
39731 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
39732 enum machine_mode mode
)
39736 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
39738 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
39744 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
39745 place emms and femms instructions. */
39747 static enum machine_mode
39748 ix86_preferred_simd_mode (enum machine_mode mode
)
39756 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
39758 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
39760 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
39762 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
39765 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
39771 if (!TARGET_VECTORIZE_DOUBLE
)
39773 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
39775 else if (TARGET_SSE2
)
39784 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
39787 static unsigned int
39788 ix86_autovectorize_vector_sizes (void)
39790 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
39793 /* Validate target specific memory model bits in VAL. */
39795 static unsigned HOST_WIDE_INT
39796 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
39798 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
39799 unsigned HOST_WIDE_INT strong
;
39801 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
39803 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
39805 warning (OPT_Winvalid_memory_model
,
39806 "Unknown architecture specific memory model");
39807 return MEMMODEL_SEQ_CST
;
39809 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
39810 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
39812 warning (OPT_Winvalid_memory_model
,
39813 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
39814 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
39816 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
39818 warning (OPT_Winvalid_memory_model
,
39819 "HLE_RELEASE not used with RELEASE or stronger memory model");
39820 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
39825 /* Initialize the GCC target structure. */
39826 #undef TARGET_RETURN_IN_MEMORY
39827 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
39829 #undef TARGET_LEGITIMIZE_ADDRESS
39830 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
39832 #undef TARGET_ATTRIBUTE_TABLE
39833 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
39834 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39835 # undef TARGET_MERGE_DECL_ATTRIBUTES
39836 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
39839 #undef TARGET_COMP_TYPE_ATTRIBUTES
39840 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
39842 #undef TARGET_INIT_BUILTINS
39843 #define TARGET_INIT_BUILTINS ix86_init_builtins
39844 #undef TARGET_BUILTIN_DECL
39845 #define TARGET_BUILTIN_DECL ix86_builtin_decl
39846 #undef TARGET_EXPAND_BUILTIN
39847 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
39849 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
39850 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
39851 ix86_builtin_vectorized_function
39853 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
39854 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
39856 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
39857 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
39859 #undef TARGET_VECTORIZE_BUILTIN_GATHER
39860 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
39862 #undef TARGET_BUILTIN_RECIPROCAL
39863 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
39865 #undef TARGET_ASM_FUNCTION_EPILOGUE
39866 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
39868 #undef TARGET_ENCODE_SECTION_INFO
39869 #ifndef SUBTARGET_ENCODE_SECTION_INFO
39870 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
39872 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
39875 #undef TARGET_ASM_OPEN_PAREN
39876 #define TARGET_ASM_OPEN_PAREN ""
39877 #undef TARGET_ASM_CLOSE_PAREN
39878 #define TARGET_ASM_CLOSE_PAREN ""
39880 #undef TARGET_ASM_BYTE_OP
39881 #define TARGET_ASM_BYTE_OP ASM_BYTE
39883 #undef TARGET_ASM_ALIGNED_HI_OP
39884 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
39885 #undef TARGET_ASM_ALIGNED_SI_OP
39886 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
39888 #undef TARGET_ASM_ALIGNED_DI_OP
39889 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
39892 #undef TARGET_PROFILE_BEFORE_PROLOGUE
39893 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
39895 #undef TARGET_ASM_UNALIGNED_HI_OP
39896 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
39897 #undef TARGET_ASM_UNALIGNED_SI_OP
39898 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
39899 #undef TARGET_ASM_UNALIGNED_DI_OP
39900 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
39902 #undef TARGET_PRINT_OPERAND
39903 #define TARGET_PRINT_OPERAND ix86_print_operand
39904 #undef TARGET_PRINT_OPERAND_ADDRESS
39905 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
39906 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
39907 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
39908 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
39909 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
39911 #undef TARGET_SCHED_INIT_GLOBAL
39912 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
39913 #undef TARGET_SCHED_ADJUST_COST
39914 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
39915 #undef TARGET_SCHED_ISSUE_RATE
39916 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
39917 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
39918 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
39919 ia32_multipass_dfa_lookahead
39921 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
39922 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
39924 #undef TARGET_MEMMODEL_CHECK
39925 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
39928 #undef TARGET_HAVE_TLS
39929 #define TARGET_HAVE_TLS true
39931 #undef TARGET_CANNOT_FORCE_CONST_MEM
39932 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
39933 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
39934 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
39936 #undef TARGET_DELEGITIMIZE_ADDRESS
39937 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
39939 #undef TARGET_MS_BITFIELD_LAYOUT_P
39940 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
39943 #undef TARGET_BINDS_LOCAL_P
39944 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
39946 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39947 #undef TARGET_BINDS_LOCAL_P
39948 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
39951 #undef TARGET_ASM_OUTPUT_MI_THUNK
39952 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
39953 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
39954 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
39956 #undef TARGET_ASM_FILE_START
39957 #define TARGET_ASM_FILE_START x86_file_start
39959 #undef TARGET_OPTION_OVERRIDE
39960 #define TARGET_OPTION_OVERRIDE ix86_option_override
39962 #undef TARGET_REGISTER_MOVE_COST
39963 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
39964 #undef TARGET_MEMORY_MOVE_COST
39965 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
39966 #undef TARGET_RTX_COSTS
39967 #define TARGET_RTX_COSTS ix86_rtx_costs
39968 #undef TARGET_ADDRESS_COST
39969 #define TARGET_ADDRESS_COST ix86_address_cost
39971 #undef TARGET_FIXED_CONDITION_CODE_REGS
39972 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
39973 #undef TARGET_CC_MODES_COMPATIBLE
39974 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
39976 #undef TARGET_MACHINE_DEPENDENT_REORG
39977 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
39979 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
39980 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
39982 #undef TARGET_BUILD_BUILTIN_VA_LIST
39983 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
39985 #undef TARGET_FOLD_BUILTIN
39986 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
39988 #undef TARGET_ENUM_VA_LIST_P
39989 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
39991 #undef TARGET_FN_ABI_VA_LIST
39992 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
39994 #undef TARGET_CANONICAL_VA_LIST_TYPE
39995 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
39997 #undef TARGET_EXPAND_BUILTIN_VA_START
39998 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
40000 #undef TARGET_MD_ASM_CLOBBERS
40001 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
40003 #undef TARGET_PROMOTE_PROTOTYPES
40004 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
40005 #undef TARGET_STRUCT_VALUE_RTX
40006 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
40007 #undef TARGET_SETUP_INCOMING_VARARGS
40008 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
40009 #undef TARGET_MUST_PASS_IN_STACK
40010 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
40011 #undef TARGET_FUNCTION_ARG_ADVANCE
40012 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
40013 #undef TARGET_FUNCTION_ARG
40014 #define TARGET_FUNCTION_ARG ix86_function_arg
40015 #undef TARGET_FUNCTION_ARG_BOUNDARY
40016 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
40017 #undef TARGET_PASS_BY_REFERENCE
40018 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
40019 #undef TARGET_INTERNAL_ARG_POINTER
40020 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
40021 #undef TARGET_UPDATE_STACK_BOUNDARY
40022 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
40023 #undef TARGET_GET_DRAP_RTX
40024 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
40025 #undef TARGET_STRICT_ARGUMENT_NAMING
40026 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
40027 #undef TARGET_STATIC_CHAIN
40028 #define TARGET_STATIC_CHAIN ix86_static_chain
40029 #undef TARGET_TRAMPOLINE_INIT
40030 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
40031 #undef TARGET_RETURN_POPS_ARGS
40032 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
40034 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
40035 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
40037 #undef TARGET_SCALAR_MODE_SUPPORTED_P
40038 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
40040 #undef TARGET_VECTOR_MODE_SUPPORTED_P
40041 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
40043 #undef TARGET_C_MODE_FOR_SUFFIX
40044 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
40047 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
40048 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
40051 #ifdef SUBTARGET_INSERT_ATTRIBUTES
40052 #undef TARGET_INSERT_ATTRIBUTES
40053 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
40056 #undef TARGET_MANGLE_TYPE
40057 #define TARGET_MANGLE_TYPE ix86_mangle_type
40060 #undef TARGET_STACK_PROTECT_FAIL
40061 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
40064 #undef TARGET_FUNCTION_VALUE
40065 #define TARGET_FUNCTION_VALUE ix86_function_value
40067 #undef TARGET_FUNCTION_VALUE_REGNO_P
40068 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
40070 #undef TARGET_PROMOTE_FUNCTION_MODE
40071 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
40073 #undef TARGET_SECONDARY_RELOAD
40074 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
40076 #undef TARGET_CLASS_MAX_NREGS
40077 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
40079 #undef TARGET_PREFERRED_RELOAD_CLASS
40080 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
40081 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
40082 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
40083 #undef TARGET_CLASS_LIKELY_SPILLED_P
40084 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
40086 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
40087 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
40088 ix86_builtin_vectorization_cost
40089 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
40090 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
40091 ix86_vectorize_vec_perm_const_ok
40092 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
40093 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
40094 ix86_preferred_simd_mode
40095 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
40096 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
40097 ix86_autovectorize_vector_sizes
40099 #undef TARGET_SET_CURRENT_FUNCTION
40100 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
40102 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
40103 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
40105 #undef TARGET_OPTION_SAVE
40106 #define TARGET_OPTION_SAVE ix86_function_specific_save
40108 #undef TARGET_OPTION_RESTORE
40109 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
40111 #undef TARGET_OPTION_PRINT
40112 #define TARGET_OPTION_PRINT ix86_function_specific_print
40114 #undef TARGET_CAN_INLINE_P
40115 #define TARGET_CAN_INLINE_P ix86_can_inline_p
40117 #undef TARGET_EXPAND_TO_RTL_HOOK
40118 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
40120 #undef TARGET_LEGITIMATE_ADDRESS_P
40121 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
40123 #undef TARGET_LEGITIMATE_CONSTANT_P
40124 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
40126 #undef TARGET_FRAME_POINTER_REQUIRED
40127 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
40129 #undef TARGET_CAN_ELIMINATE
40130 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
40132 #undef TARGET_EXTRA_LIVE_ON_ENTRY
40133 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
40135 #undef TARGET_ASM_CODE_END
40136 #define TARGET_ASM_CODE_END ix86_code_end
40138 #undef TARGET_CONDITIONAL_REGISTER_USAGE
40139 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
40142 #undef TARGET_INIT_LIBFUNCS
40143 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
40146 struct gcc_target targetm
= TARGET_INITIALIZER
;
40148 #include "gt-i386.h"