1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
66 enum upper_128bits_state
73 typedef struct block_info_def
75 /* State of the upper 128bits of AVX registers at exit. */
76 enum upper_128bits_state state
;
77 /* TRUE if state of the upper 128bits of AVX registers is unchanged
80 /* TRUE if block has been processed. */
82 /* TRUE if block has been scanned. */
84 /* Previous state of the upper 128bits of AVX registers at entry. */
85 enum upper_128bits_state prev
;
88 #define BLOCK_INFO(B) ((block_info) (B)->aux)
90 enum call_avx256_state
92 /* Callee returns 256bit AVX register. */
93 callee_return_avx256
= -1,
94 /* Callee returns and passes 256bit AVX register. */
95 callee_return_pass_avx256
,
96 /* Callee passes 256bit AVX register. */
98 /* Callee doesn't return nor passe 256bit AVX register, or no
99 256bit AVX register in function return. */
101 /* vzeroupper intrinsic. */
105 /* Check if a 256bit AVX register is referenced in stores. */
108 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
111 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
112 || (GET_CODE (set
) == SET
113 && REG_P (SET_SRC (set
))
114 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
116 enum upper_128bits_state
*state
117 = (enum upper_128bits_state
*) data
;
122 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
123 in basic block BB. Delete it if upper 128bit AVX registers are
124 unused. If it isn't deleted, move it to just before a jump insn.
126 STATE is state of the upper 128bits of AVX registers at entry. */
129 move_or_delete_vzeroupper_2 (basic_block bb
,
130 enum upper_128bits_state state
)
133 rtx vzeroupper_insn
= NULL_RTX
;
138 if (BLOCK_INFO (bb
)->unchanged
)
141 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
144 BLOCK_INFO (bb
)->state
= state
;
148 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
151 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
152 bb
->index
, BLOCK_INFO (bb
)->state
);
156 BLOCK_INFO (bb
)->prev
= state
;
159 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
164 /* BB_END changes when it is deleted. */
165 bb_end
= BB_END (bb
);
167 while (insn
!= bb_end
)
169 insn
= NEXT_INSN (insn
);
171 if (!NONDEBUG_INSN_P (insn
))
174 /* Move vzeroupper before jump/call. */
175 if (JUMP_P (insn
) || CALL_P (insn
))
177 if (!vzeroupper_insn
)
180 if (PREV_INSN (insn
) != vzeroupper_insn
)
184 fprintf (dump_file
, "Move vzeroupper after:\n");
185 print_rtl_single (dump_file
, PREV_INSN (insn
));
186 fprintf (dump_file
, "before:\n");
187 print_rtl_single (dump_file
, insn
);
189 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
192 vzeroupper_insn
= NULL_RTX
;
196 pat
= PATTERN (insn
);
198 /* Check insn for vzeroupper intrinsic. */
199 if (GET_CODE (pat
) == UNSPEC_VOLATILE
200 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
204 /* Found vzeroupper intrinsic. */
205 fprintf (dump_file
, "Found vzeroupper:\n");
206 print_rtl_single (dump_file
, insn
);
211 /* Check insn for vzeroall intrinsic. */
212 if (GET_CODE (pat
) == PARALLEL
213 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
214 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
219 /* Delete pending vzeroupper insertion. */
222 delete_insn (vzeroupper_insn
);
223 vzeroupper_insn
= NULL_RTX
;
226 else if (state
!= used
)
228 note_stores (pat
, check_avx256_stores
, &state
);
235 /* Process vzeroupper intrinsic. */
236 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
240 /* Since the upper 128bits are cleared, callee must not pass
241 256bit AVX register. We only need to check if callee
242 returns 256bit AVX register. */
243 if (avx256
== callee_return_avx256
)
249 /* Remove unnecessary vzeroupper since upper 128bits are
253 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
254 print_rtl_single (dump_file
, insn
);
260 /* Set state to UNUSED if callee doesn't return 256bit AVX
262 if (avx256
!= callee_return_pass_avx256
)
265 if (avx256
== callee_return_pass_avx256
266 || avx256
== callee_pass_avx256
)
268 /* Must remove vzeroupper since callee passes in 256bit
272 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
273 print_rtl_single (dump_file
, insn
);
279 vzeroupper_insn
= insn
;
285 BLOCK_INFO (bb
)->state
= state
;
286 BLOCK_INFO (bb
)->unchanged
= unchanged
;
287 BLOCK_INFO (bb
)->scanned
= true;
290 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
291 bb
->index
, unchanged
? "unchanged" : "changed",
295 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
296 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
297 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
301 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
305 enum upper_128bits_state state
, old_state
, new_state
;
309 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
310 block
->index
, BLOCK_INFO (block
)->processed
);
312 if (BLOCK_INFO (block
)->processed
)
317 /* Check all predecessor edges of this block. */
318 seen_unknown
= false;
319 FOR_EACH_EDGE (e
, ei
, block
->preds
)
323 switch (BLOCK_INFO (e
->src
)->state
)
326 if (!unknown_is_unused
)
340 old_state
= BLOCK_INFO (block
)->state
;
341 move_or_delete_vzeroupper_2 (block
, state
);
342 new_state
= BLOCK_INFO (block
)->state
;
344 if (state
!= unknown
|| new_state
== used
)
345 BLOCK_INFO (block
)->processed
= true;
347 /* Need to rescan if the upper 128bits of AVX registers are changed
349 if (new_state
!= old_state
)
351 if (new_state
== used
)
352 cfun
->machine
->rescan_vzeroupper_p
= 1;
359 /* Go through the instruction stream looking for vzeroupper. Delete
360 it if upper 128bit AVX registers are unused. If it isn't deleted,
361 move it to just before a jump insn. */
364 move_or_delete_vzeroupper (void)
369 fibheap_t worklist
, pending
, fibheap_swap
;
370 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
375 /* Set up block info for each basic block. */
376 alloc_aux_for_blocks (sizeof (struct block_info_def
));
378 /* Process outgoing edges of entry point. */
380 fprintf (dump_file
, "Process outgoing edges of entry point\n");
382 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
384 move_or_delete_vzeroupper_2 (e
->dest
,
385 cfun
->machine
->caller_pass_avx256_p
387 BLOCK_INFO (e
->dest
)->processed
= true;
390 /* Compute reverse completion order of depth first search of the CFG
391 so that the data-flow runs faster. */
392 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
393 bb_order
= XNEWVEC (int, last_basic_block
);
394 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
395 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
396 bb_order
[rc_order
[i
]] = i
;
399 worklist
= fibheap_new ();
400 pending
= fibheap_new ();
401 visited
= sbitmap_alloc (last_basic_block
);
402 in_worklist
= sbitmap_alloc (last_basic_block
);
403 in_pending
= sbitmap_alloc (last_basic_block
);
404 sbitmap_zero (in_worklist
);
406 /* Don't check outgoing edges of entry point. */
407 sbitmap_ones (in_pending
);
409 if (BLOCK_INFO (bb
)->processed
)
410 RESET_BIT (in_pending
, bb
->index
);
413 move_or_delete_vzeroupper_1 (bb
, false);
414 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
418 fprintf (dump_file
, "Check remaining basic blocks\n");
420 while (!fibheap_empty (pending
))
422 fibheap_swap
= pending
;
424 worklist
= fibheap_swap
;
425 sbitmap_swap
= in_pending
;
426 in_pending
= in_worklist
;
427 in_worklist
= sbitmap_swap
;
429 sbitmap_zero (visited
);
431 cfun
->machine
->rescan_vzeroupper_p
= 0;
433 while (!fibheap_empty (worklist
))
435 bb
= (basic_block
) fibheap_extract_min (worklist
);
436 RESET_BIT (in_worklist
, bb
->index
);
437 gcc_assert (!TEST_BIT (visited
, bb
->index
));
438 if (!TEST_BIT (visited
, bb
->index
))
442 SET_BIT (visited
, bb
->index
);
444 if (move_or_delete_vzeroupper_1 (bb
, false))
445 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
447 if (e
->dest
== EXIT_BLOCK_PTR
448 || BLOCK_INFO (e
->dest
)->processed
)
451 if (TEST_BIT (visited
, e
->dest
->index
))
453 if (!TEST_BIT (in_pending
, e
->dest
->index
))
455 /* Send E->DEST to next round. */
456 SET_BIT (in_pending
, e
->dest
->index
);
457 fibheap_insert (pending
,
458 bb_order
[e
->dest
->index
],
462 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
464 /* Add E->DEST to current round. */
465 SET_BIT (in_worklist
, e
->dest
->index
);
466 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
473 if (!cfun
->machine
->rescan_vzeroupper_p
)
478 fibheap_delete (worklist
);
479 fibheap_delete (pending
);
480 sbitmap_free (visited
);
481 sbitmap_free (in_worklist
);
482 sbitmap_free (in_pending
);
485 fprintf (dump_file
, "Process remaining basic blocks\n");
488 move_or_delete_vzeroupper_1 (bb
, true);
490 free_aux_for_blocks ();
493 static rtx
legitimize_dllimport_symbol (rtx
, bool);
495 #ifndef CHECK_STACK_LIMIT
496 #define CHECK_STACK_LIMIT (-1)
499 /* Return index of given mode in mult and division cost tables. */
500 #define MODE_INDEX(mode) \
501 ((mode) == QImode ? 0 \
502 : (mode) == HImode ? 1 \
503 : (mode) == SImode ? 2 \
504 : (mode) == DImode ? 3 \
507 /* Processor costs (relative to an add) */
508 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
509 #define COSTS_N_BYTES(N) ((N) * 2)
511 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
514 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
515 COSTS_N_BYTES (2), /* cost of an add instruction */
516 COSTS_N_BYTES (3), /* cost of a lea instruction */
517 COSTS_N_BYTES (2), /* variable shift costs */
518 COSTS_N_BYTES (3), /* constant shift costs */
519 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
520 COSTS_N_BYTES (3), /* HI */
521 COSTS_N_BYTES (3), /* SI */
522 COSTS_N_BYTES (3), /* DI */
523 COSTS_N_BYTES (5)}, /* other */
524 0, /* cost of multiply per each bit set */
525 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
526 COSTS_N_BYTES (3), /* HI */
527 COSTS_N_BYTES (3), /* SI */
528 COSTS_N_BYTES (3), /* DI */
529 COSTS_N_BYTES (5)}, /* other */
530 COSTS_N_BYTES (3), /* cost of movsx */
531 COSTS_N_BYTES (3), /* cost of movzx */
532 0, /* "large" insn */
534 2, /* cost for loading QImode using movzbl */
535 {2, 2, 2}, /* cost of loading integer registers
536 in QImode, HImode and SImode.
537 Relative to reg-reg move (2). */
538 {2, 2, 2}, /* cost of storing integer registers */
539 2, /* cost of reg,reg fld/fst */
540 {2, 2, 2}, /* cost of loading fp registers
541 in SFmode, DFmode and XFmode */
542 {2, 2, 2}, /* cost of storing fp registers
543 in SFmode, DFmode and XFmode */
544 3, /* cost of moving MMX register */
545 {3, 3}, /* cost of loading MMX registers
546 in SImode and DImode */
547 {3, 3}, /* cost of storing MMX registers
548 in SImode and DImode */
549 3, /* cost of moving SSE register */
550 {3, 3, 3}, /* cost of loading SSE registers
551 in SImode, DImode and TImode */
552 {3, 3, 3}, /* cost of storing SSE registers
553 in SImode, DImode and TImode */
554 3, /* MMX or SSE register to integer */
555 0, /* size of l1 cache */
556 0, /* size of l2 cache */
557 0, /* size of prefetch block */
558 0, /* number of parallel prefetches */
560 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
562 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
563 COSTS_N_BYTES (2), /* cost of FABS instruction. */
564 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
565 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
568 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
569 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 1, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 1, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
583 /* Processor costs (relative to an add) */
585 struct processor_costs i386_cost
= { /* 386 specific costs */
586 COSTS_N_INSNS (1), /* cost of an add instruction */
587 COSTS_N_INSNS (1), /* cost of a lea instruction */
588 COSTS_N_INSNS (3), /* variable shift costs */
589 COSTS_N_INSNS (2), /* constant shift costs */
590 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
591 COSTS_N_INSNS (6), /* HI */
592 COSTS_N_INSNS (6), /* SI */
593 COSTS_N_INSNS (6), /* DI */
594 COSTS_N_INSNS (6)}, /* other */
595 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
596 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
597 COSTS_N_INSNS (23), /* HI */
598 COSTS_N_INSNS (23), /* SI */
599 COSTS_N_INSNS (23), /* DI */
600 COSTS_N_INSNS (23)}, /* other */
601 COSTS_N_INSNS (3), /* cost of movsx */
602 COSTS_N_INSNS (2), /* cost of movzx */
603 15, /* "large" insn */
605 4, /* cost for loading QImode using movzbl */
606 {2, 4, 2}, /* cost of loading integer registers
607 in QImode, HImode and SImode.
608 Relative to reg-reg move (2). */
609 {2, 4, 2}, /* cost of storing integer registers */
610 2, /* cost of reg,reg fld/fst */
611 {8, 8, 8}, /* cost of loading fp registers
612 in SFmode, DFmode and XFmode */
613 {8, 8, 8}, /* cost of storing fp registers
614 in SFmode, DFmode and XFmode */
615 2, /* cost of moving MMX register */
616 {4, 8}, /* cost of loading MMX registers
617 in SImode and DImode */
618 {4, 8}, /* cost of storing MMX registers
619 in SImode and DImode */
620 2, /* cost of moving SSE register */
621 {4, 8, 16}, /* cost of loading SSE registers
622 in SImode, DImode and TImode */
623 {4, 8, 16}, /* cost of storing SSE registers
624 in SImode, DImode and TImode */
625 3, /* MMX or SSE register to integer */
626 0, /* size of l1 cache */
627 0, /* size of l2 cache */
628 0, /* size of prefetch block */
629 0, /* number of parallel prefetches */
631 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
632 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
633 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
634 COSTS_N_INSNS (22), /* cost of FABS instruction. */
635 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
636 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
637 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
640 DUMMY_STRINGOP_ALGS
},
641 1, /* scalar_stmt_cost. */
642 1, /* scalar load_cost. */
643 1, /* scalar_store_cost. */
644 1, /* vec_stmt_cost. */
645 1, /* vec_to_scalar_cost. */
646 1, /* scalar_to_vec_cost. */
647 1, /* vec_align_load_cost. */
648 2, /* vec_unalign_load_cost. */
649 1, /* vec_store_cost. */
650 3, /* cond_taken_branch_cost. */
651 1, /* cond_not_taken_branch_cost. */
655 struct processor_costs i486_cost
= { /* 486 specific costs */
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (1), /* cost of a lea instruction */
658 COSTS_N_INSNS (3), /* variable shift costs */
659 COSTS_N_INSNS (2), /* constant shift costs */
660 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (12), /* HI */
662 COSTS_N_INSNS (12), /* SI */
663 COSTS_N_INSNS (12), /* DI */
664 COSTS_N_INSNS (12)}, /* other */
665 1, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (40), /* HI */
668 COSTS_N_INSNS (40), /* SI */
669 COSTS_N_INSNS (40), /* DI */
670 COSTS_N_INSNS (40)}, /* other */
671 COSTS_N_INSNS (3), /* cost of movsx */
672 COSTS_N_INSNS (2), /* cost of movzx */
673 15, /* "large" insn */
675 4, /* cost for loading QImode using movzbl */
676 {2, 4, 2}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {2, 4, 2}, /* cost of storing integer registers */
680 2, /* cost of reg,reg fld/fst */
681 {8, 8, 8}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {8, 8, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 8}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 8}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 8, 16}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 8, 16}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 3, /* MMX or SSE register to integer */
696 4, /* size of l1 cache. 486 has 8kB cache
697 shared for code and data, so 4kB is
698 not really precise. */
699 4, /* size of l2 cache */
700 0, /* size of prefetch block */
701 0, /* number of parallel prefetches */
703 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
704 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
705 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
706 COSTS_N_INSNS (3), /* cost of FABS instruction. */
707 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
708 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
709 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
712 DUMMY_STRINGOP_ALGS
},
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
727 struct processor_costs pentium_cost
= {
728 COSTS_N_INSNS (1), /* cost of an add instruction */
729 COSTS_N_INSNS (1), /* cost of a lea instruction */
730 COSTS_N_INSNS (4), /* variable shift costs */
731 COSTS_N_INSNS (1), /* constant shift costs */
732 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
733 COSTS_N_INSNS (11), /* HI */
734 COSTS_N_INSNS (11), /* SI */
735 COSTS_N_INSNS (11), /* DI */
736 COSTS_N_INSNS (11)}, /* other */
737 0, /* cost of multiply per each bit set */
738 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
739 COSTS_N_INSNS (25), /* HI */
740 COSTS_N_INSNS (25), /* SI */
741 COSTS_N_INSNS (25), /* DI */
742 COSTS_N_INSNS (25)}, /* other */
743 COSTS_N_INSNS (3), /* cost of movsx */
744 COSTS_N_INSNS (2), /* cost of movzx */
745 8, /* "large" insn */
747 6, /* cost for loading QImode using movzbl */
748 {2, 4, 2}, /* cost of loading integer registers
749 in QImode, HImode and SImode.
750 Relative to reg-reg move (2). */
751 {2, 4, 2}, /* cost of storing integer registers */
752 2, /* cost of reg,reg fld/fst */
753 {2, 2, 6}, /* cost of loading fp registers
754 in SFmode, DFmode and XFmode */
755 {4, 4, 6}, /* cost of storing fp registers
756 in SFmode, DFmode and XFmode */
757 8, /* cost of moving MMX register */
758 {8, 8}, /* cost of loading MMX registers
759 in SImode and DImode */
760 {8, 8}, /* cost of storing MMX registers
761 in SImode and DImode */
762 2, /* cost of moving SSE register */
763 {4, 8, 16}, /* cost of loading SSE registers
764 in SImode, DImode and TImode */
765 {4, 8, 16}, /* cost of storing SSE registers
766 in SImode, DImode and TImode */
767 3, /* MMX or SSE register to integer */
768 8, /* size of l1 cache. */
769 8, /* size of l2 cache */
770 0, /* size of prefetch block */
771 0, /* number of parallel prefetches */
773 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
774 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
775 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
776 COSTS_N_INSNS (1), /* cost of FABS instruction. */
777 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
778 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
779 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
780 DUMMY_STRINGOP_ALGS
},
781 {{libcall
, {{-1, rep_prefix_4_byte
}}},
782 DUMMY_STRINGOP_ALGS
},
783 1, /* scalar_stmt_cost. */
784 1, /* scalar load_cost. */
785 1, /* scalar_store_cost. */
786 1, /* vec_stmt_cost. */
787 1, /* vec_to_scalar_cost. */
788 1, /* scalar_to_vec_cost. */
789 1, /* vec_align_load_cost. */
790 2, /* vec_unalign_load_cost. */
791 1, /* vec_store_cost. */
792 3, /* cond_taken_branch_cost. */
793 1, /* cond_not_taken_branch_cost. */
797 struct processor_costs pentiumpro_cost
= {
798 COSTS_N_INSNS (1), /* cost of an add instruction */
799 COSTS_N_INSNS (1), /* cost of a lea instruction */
800 COSTS_N_INSNS (1), /* variable shift costs */
801 COSTS_N_INSNS (1), /* constant shift costs */
802 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
803 COSTS_N_INSNS (4), /* HI */
804 COSTS_N_INSNS (4), /* SI */
805 COSTS_N_INSNS (4), /* DI */
806 COSTS_N_INSNS (4)}, /* other */
807 0, /* cost of multiply per each bit set */
808 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
809 COSTS_N_INSNS (17), /* HI */
810 COSTS_N_INSNS (17), /* SI */
811 COSTS_N_INSNS (17), /* DI */
812 COSTS_N_INSNS (17)}, /* other */
813 COSTS_N_INSNS (1), /* cost of movsx */
814 COSTS_N_INSNS (1), /* cost of movzx */
815 8, /* "large" insn */
817 2, /* cost for loading QImode using movzbl */
818 {4, 4, 4}, /* cost of loading integer registers
819 in QImode, HImode and SImode.
820 Relative to reg-reg move (2). */
821 {2, 2, 2}, /* cost of storing integer registers */
822 2, /* cost of reg,reg fld/fst */
823 {2, 2, 6}, /* cost of loading fp registers
824 in SFmode, DFmode and XFmode */
825 {4, 4, 6}, /* cost of storing fp registers
826 in SFmode, DFmode and XFmode */
827 2, /* cost of moving MMX register */
828 {2, 2}, /* cost of loading MMX registers
829 in SImode and DImode */
830 {2, 2}, /* cost of storing MMX registers
831 in SImode and DImode */
832 2, /* cost of moving SSE register */
833 {2, 2, 8}, /* cost of loading SSE registers
834 in SImode, DImode and TImode */
835 {2, 2, 8}, /* cost of storing SSE registers
836 in SImode, DImode and TImode */
837 3, /* MMX or SSE register to integer */
838 8, /* size of l1 cache. */
839 256, /* size of l2 cache */
840 32, /* size of prefetch block */
841 6, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (2), /* cost of FABS instruction. */
847 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
849 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
850 (we ensure the alignment). For small blocks inline loop is still a
851 noticeable win, for bigger blocks either rep movsl or rep movsb is
852 way to go. Rep movsb has apparently more expensive startup time in CPU,
853 but after 4K the difference is down in the noise. */
854 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
855 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
856 DUMMY_STRINGOP_ALGS
},
857 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
858 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
859 DUMMY_STRINGOP_ALGS
},
860 1, /* scalar_stmt_cost. */
861 1, /* scalar load_cost. */
862 1, /* scalar_store_cost. */
863 1, /* vec_stmt_cost. */
864 1, /* vec_to_scalar_cost. */
865 1, /* scalar_to_vec_cost. */
866 1, /* vec_align_load_cost. */
867 2, /* vec_unalign_load_cost. */
868 1, /* vec_store_cost. */
869 3, /* cond_taken_branch_cost. */
870 1, /* cond_not_taken_branch_cost. */
874 struct processor_costs geode_cost
= {
875 COSTS_N_INSNS (1), /* cost of an add instruction */
876 COSTS_N_INSNS (1), /* cost of a lea instruction */
877 COSTS_N_INSNS (2), /* variable shift costs */
878 COSTS_N_INSNS (1), /* constant shift costs */
879 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
880 COSTS_N_INSNS (4), /* HI */
881 COSTS_N_INSNS (7), /* SI */
882 COSTS_N_INSNS (7), /* DI */
883 COSTS_N_INSNS (7)}, /* other */
884 0, /* cost of multiply per each bit set */
885 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
886 COSTS_N_INSNS (23), /* HI */
887 COSTS_N_INSNS (39), /* SI */
888 COSTS_N_INSNS (39), /* DI */
889 COSTS_N_INSNS (39)}, /* other */
890 COSTS_N_INSNS (1), /* cost of movsx */
891 COSTS_N_INSNS (1), /* cost of movzx */
892 8, /* "large" insn */
894 1, /* cost for loading QImode using movzbl */
895 {1, 1, 1}, /* cost of loading integer registers
896 in QImode, HImode and SImode.
897 Relative to reg-reg move (2). */
898 {1, 1, 1}, /* cost of storing integer registers */
899 1, /* cost of reg,reg fld/fst */
900 {1, 1, 1}, /* cost of loading fp registers
901 in SFmode, DFmode and XFmode */
902 {4, 6, 6}, /* cost of storing fp registers
903 in SFmode, DFmode and XFmode */
905 1, /* cost of moving MMX register */
906 {1, 1}, /* cost of loading MMX registers
907 in SImode and DImode */
908 {1, 1}, /* cost of storing MMX registers
909 in SImode and DImode */
910 1, /* cost of moving SSE register */
911 {1, 1, 1}, /* cost of loading SSE registers
912 in SImode, DImode and TImode */
913 {1, 1, 1}, /* cost of storing SSE registers
914 in SImode, DImode and TImode */
915 1, /* MMX or SSE register to integer */
916 64, /* size of l1 cache. */
917 128, /* size of l2 cache. */
918 32, /* size of prefetch block */
919 1, /* number of parallel prefetches */
921 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
922 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
923 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
924 COSTS_N_INSNS (1), /* cost of FABS instruction. */
925 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
926 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
927 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
928 DUMMY_STRINGOP_ALGS
},
929 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
930 DUMMY_STRINGOP_ALGS
},
931 1, /* scalar_stmt_cost. */
932 1, /* scalar load_cost. */
933 1, /* scalar_store_cost. */
934 1, /* vec_stmt_cost. */
935 1, /* vec_to_scalar_cost. */
936 1, /* scalar_to_vec_cost. */
937 1, /* vec_align_load_cost. */
938 2, /* vec_unalign_load_cost. */
939 1, /* vec_store_cost. */
940 3, /* cond_taken_branch_cost. */
941 1, /* cond_not_taken_branch_cost. */
945 struct processor_costs k6_cost
= {
946 COSTS_N_INSNS (1), /* cost of an add instruction */
947 COSTS_N_INSNS (2), /* cost of a lea instruction */
948 COSTS_N_INSNS (1), /* variable shift costs */
949 COSTS_N_INSNS (1), /* constant shift costs */
950 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
951 COSTS_N_INSNS (3), /* HI */
952 COSTS_N_INSNS (3), /* SI */
953 COSTS_N_INSNS (3), /* DI */
954 COSTS_N_INSNS (3)}, /* other */
955 0, /* cost of multiply per each bit set */
956 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
957 COSTS_N_INSNS (18), /* HI */
958 COSTS_N_INSNS (18), /* SI */
959 COSTS_N_INSNS (18), /* DI */
960 COSTS_N_INSNS (18)}, /* other */
961 COSTS_N_INSNS (2), /* cost of movsx */
962 COSTS_N_INSNS (2), /* cost of movzx */
963 8, /* "large" insn */
965 3, /* cost for loading QImode using movzbl */
966 {4, 5, 4}, /* cost of loading integer registers
967 in QImode, HImode and SImode.
968 Relative to reg-reg move (2). */
969 {2, 3, 2}, /* cost of storing integer registers */
970 4, /* cost of reg,reg fld/fst */
971 {6, 6, 6}, /* cost of loading fp registers
972 in SFmode, DFmode and XFmode */
973 {4, 4, 4}, /* cost of storing fp registers
974 in SFmode, DFmode and XFmode */
975 2, /* cost of moving MMX register */
976 {2, 2}, /* cost of loading MMX registers
977 in SImode and DImode */
978 {2, 2}, /* cost of storing MMX registers
979 in SImode and DImode */
980 2, /* cost of moving SSE register */
981 {2, 2, 8}, /* cost of loading SSE registers
982 in SImode, DImode and TImode */
983 {2, 2, 8}, /* cost of storing SSE registers
984 in SImode, DImode and TImode */
985 6, /* MMX or SSE register to integer */
986 32, /* size of l1 cache. */
987 32, /* size of l2 cache. Some models
988 have integrated l2 cache, but
989 optimizing for k6 is not important
990 enough to worry about that. */
991 32, /* size of prefetch block */
992 1, /* number of parallel prefetches */
994 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
995 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
996 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
997 COSTS_N_INSNS (2), /* cost of FABS instruction. */
998 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
999 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
1000 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1001 DUMMY_STRINGOP_ALGS
},
1002 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1003 DUMMY_STRINGOP_ALGS
},
1004 1, /* scalar_stmt_cost. */
1005 1, /* scalar load_cost. */
1006 1, /* scalar_store_cost. */
1007 1, /* vec_stmt_cost. */
1008 1, /* vec_to_scalar_cost. */
1009 1, /* scalar_to_vec_cost. */
1010 1, /* vec_align_load_cost. */
1011 2, /* vec_unalign_load_cost. */
1012 1, /* vec_store_cost. */
1013 3, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1018 struct processor_costs athlon_cost
= {
1019 COSTS_N_INSNS (1), /* cost of an add instruction */
1020 COSTS_N_INSNS (2), /* cost of a lea instruction */
1021 COSTS_N_INSNS (1), /* variable shift costs */
1022 COSTS_N_INSNS (1), /* constant shift costs */
1023 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1024 COSTS_N_INSNS (5), /* HI */
1025 COSTS_N_INSNS (5), /* SI */
1026 COSTS_N_INSNS (5), /* DI */
1027 COSTS_N_INSNS (5)}, /* other */
1028 0, /* cost of multiply per each bit set */
1029 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1030 COSTS_N_INSNS (26), /* HI */
1031 COSTS_N_INSNS (42), /* SI */
1032 COSTS_N_INSNS (74), /* DI */
1033 COSTS_N_INSNS (74)}, /* other */
1034 COSTS_N_INSNS (1), /* cost of movsx */
1035 COSTS_N_INSNS (1), /* cost of movzx */
1036 8, /* "large" insn */
1038 4, /* cost for loading QImode using movzbl */
1039 {3, 4, 3}, /* cost of loading integer registers
1040 in QImode, HImode and SImode.
1041 Relative to reg-reg move (2). */
1042 {3, 4, 3}, /* cost of storing integer registers */
1043 4, /* cost of reg,reg fld/fst */
1044 {4, 4, 12}, /* cost of loading fp registers
1045 in SFmode, DFmode and XFmode */
1046 {6, 6, 8}, /* cost of storing fp registers
1047 in SFmode, DFmode and XFmode */
1048 2, /* cost of moving MMX register */
1049 {4, 4}, /* cost of loading MMX registers
1050 in SImode and DImode */
1051 {4, 4}, /* cost of storing MMX registers
1052 in SImode and DImode */
1053 2, /* cost of moving SSE register */
1054 {4, 4, 6}, /* cost of loading SSE registers
1055 in SImode, DImode and TImode */
1056 {4, 4, 5}, /* cost of storing SSE registers
1057 in SImode, DImode and TImode */
1058 5, /* MMX or SSE register to integer */
1059 64, /* size of l1 cache. */
1060 256, /* size of l2 cache. */
1061 64, /* size of prefetch block */
1062 6, /* number of parallel prefetches */
1063 5, /* Branch cost */
1064 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1065 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1066 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1067 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1068 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1069 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1070 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1071 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1072 128 bytes for memset. */
1073 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1074 DUMMY_STRINGOP_ALGS
},
1075 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1076 DUMMY_STRINGOP_ALGS
},
1077 1, /* scalar_stmt_cost. */
1078 1, /* scalar load_cost. */
1079 1, /* scalar_store_cost. */
1080 1, /* vec_stmt_cost. */
1081 1, /* vec_to_scalar_cost. */
1082 1, /* scalar_to_vec_cost. */
1083 1, /* vec_align_load_cost. */
1084 2, /* vec_unalign_load_cost. */
1085 1, /* vec_store_cost. */
1086 3, /* cond_taken_branch_cost. */
1087 1, /* cond_not_taken_branch_cost. */
1091 struct processor_costs k8_cost
= {
1092 COSTS_N_INSNS (1), /* cost of an add instruction */
1093 COSTS_N_INSNS (2), /* cost of a lea instruction */
1094 COSTS_N_INSNS (1), /* variable shift costs */
1095 COSTS_N_INSNS (1), /* constant shift costs */
1096 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1097 COSTS_N_INSNS (4), /* HI */
1098 COSTS_N_INSNS (3), /* SI */
1099 COSTS_N_INSNS (4), /* DI */
1100 COSTS_N_INSNS (5)}, /* other */
1101 0, /* cost of multiply per each bit set */
1102 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1103 COSTS_N_INSNS (26), /* HI */
1104 COSTS_N_INSNS (42), /* SI */
1105 COSTS_N_INSNS (74), /* DI */
1106 COSTS_N_INSNS (74)}, /* other */
1107 COSTS_N_INSNS (1), /* cost of movsx */
1108 COSTS_N_INSNS (1), /* cost of movzx */
1109 8, /* "large" insn */
1111 4, /* cost for loading QImode using movzbl */
1112 {3, 4, 3}, /* cost of loading integer registers
1113 in QImode, HImode and SImode.
1114 Relative to reg-reg move (2). */
1115 {3, 4, 3}, /* cost of storing integer registers */
1116 4, /* cost of reg,reg fld/fst */
1117 {4, 4, 12}, /* cost of loading fp registers
1118 in SFmode, DFmode and XFmode */
1119 {6, 6, 8}, /* cost of storing fp registers
1120 in SFmode, DFmode and XFmode */
1121 2, /* cost of moving MMX register */
1122 {3, 3}, /* cost of loading MMX registers
1123 in SImode and DImode */
1124 {4, 4}, /* cost of storing MMX registers
1125 in SImode and DImode */
1126 2, /* cost of moving SSE register */
1127 {4, 3, 6}, /* cost of loading SSE registers
1128 in SImode, DImode and TImode */
1129 {4, 4, 5}, /* cost of storing SSE registers
1130 in SImode, DImode and TImode */
1131 5, /* MMX or SSE register to integer */
1132 64, /* size of l1 cache. */
1133 512, /* size of l2 cache. */
1134 64, /* size of prefetch block */
1135 /* New AMD processors never drop prefetches; if they cannot be performed
1136 immediately, they are queued. We set number of simultaneous prefetches
1137 to a large constant to reflect this (it probably is not a good idea not
1138 to limit number of prefetches at all, as their execution also takes some
1140 100, /* number of parallel prefetches */
1141 3, /* Branch cost */
1142 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1143 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1144 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1145 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1146 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1147 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1148 /* K8 has optimized REP instruction for medium sized blocks, but for very
1149 small blocks it is better to use loop. For large blocks, libcall can
1150 do nontemporary accesses and beat inline considerably. */
1151 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1152 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1153 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1154 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1155 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1156 4, /* scalar_stmt_cost. */
1157 2, /* scalar load_cost. */
1158 2, /* scalar_store_cost. */
1159 5, /* vec_stmt_cost. */
1160 0, /* vec_to_scalar_cost. */
1161 2, /* scalar_to_vec_cost. */
1162 2, /* vec_align_load_cost. */
1163 3, /* vec_unalign_load_cost. */
1164 3, /* vec_store_cost. */
1165 3, /* cond_taken_branch_cost. */
1166 2, /* cond_not_taken_branch_cost. */
1169 struct processor_costs amdfam10_cost
= {
1170 COSTS_N_INSNS (1), /* cost of an add instruction */
1171 COSTS_N_INSNS (2), /* cost of a lea instruction */
1172 COSTS_N_INSNS (1), /* variable shift costs */
1173 COSTS_N_INSNS (1), /* constant shift costs */
1174 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1175 COSTS_N_INSNS (4), /* HI */
1176 COSTS_N_INSNS (3), /* SI */
1177 COSTS_N_INSNS (4), /* DI */
1178 COSTS_N_INSNS (5)}, /* other */
1179 0, /* cost of multiply per each bit set */
1180 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1181 COSTS_N_INSNS (35), /* HI */
1182 COSTS_N_INSNS (51), /* SI */
1183 COSTS_N_INSNS (83), /* DI */
1184 COSTS_N_INSNS (83)}, /* other */
1185 COSTS_N_INSNS (1), /* cost of movsx */
1186 COSTS_N_INSNS (1), /* cost of movzx */
1187 8, /* "large" insn */
1189 4, /* cost for loading QImode using movzbl */
1190 {3, 4, 3}, /* cost of loading integer registers
1191 in QImode, HImode and SImode.
1192 Relative to reg-reg move (2). */
1193 {3, 4, 3}, /* cost of storing integer registers */
1194 4, /* cost of reg,reg fld/fst */
1195 {4, 4, 12}, /* cost of loading fp registers
1196 in SFmode, DFmode and XFmode */
1197 {6, 6, 8}, /* cost of storing fp registers
1198 in SFmode, DFmode and XFmode */
1199 2, /* cost of moving MMX register */
1200 {3, 3}, /* cost of loading MMX registers
1201 in SImode and DImode */
1202 {4, 4}, /* cost of storing MMX registers
1203 in SImode and DImode */
1204 2, /* cost of moving SSE register */
1205 {4, 4, 3}, /* cost of loading SSE registers
1206 in SImode, DImode and TImode */
1207 {4, 4, 5}, /* cost of storing SSE registers
1208 in SImode, DImode and TImode */
1209 3, /* MMX or SSE register to integer */
1211 MOVD reg64, xmmreg Double FSTORE 4
1212 MOVD reg32, xmmreg Double FSTORE 4
1214 MOVD reg64, xmmreg Double FADD 3
1216 MOVD reg32, xmmreg Double FADD 3
1218 64, /* size of l1 cache. */
1219 512, /* size of l2 cache. */
1220 64, /* size of prefetch block */
1221 /* New AMD processors never drop prefetches; if they cannot be performed
1222 immediately, they are queued. We set number of simultaneous prefetches
1223 to a large constant to reflect this (it probably is not a good idea not
1224 to limit number of prefetches at all, as their execution also takes some
1226 100, /* number of parallel prefetches */
1227 2, /* Branch cost */
1228 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1229 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1230 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1231 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1232 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1233 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1235 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1236 very small blocks it is better to use loop. For large blocks, libcall can
1237 do nontemporary accesses and beat inline considerably. */
1238 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1239 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1240 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1241 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1242 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1243 4, /* scalar_stmt_cost. */
1244 2, /* scalar load_cost. */
1245 2, /* scalar_store_cost. */
1246 6, /* vec_stmt_cost. */
1247 0, /* vec_to_scalar_cost. */
1248 2, /* scalar_to_vec_cost. */
1249 2, /* vec_align_load_cost. */
1250 2, /* vec_unalign_load_cost. */
1251 2, /* vec_store_cost. */
1252 2, /* cond_taken_branch_cost. */
1253 1, /* cond_not_taken_branch_cost. */
1256 struct processor_costs bdver1_cost
= {
1257 COSTS_N_INSNS (1), /* cost of an add instruction */
1258 COSTS_N_INSNS (1), /* cost of a lea instruction */
1259 COSTS_N_INSNS (1), /* variable shift costs */
1260 COSTS_N_INSNS (1), /* constant shift costs */
1261 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1262 COSTS_N_INSNS (4), /* HI */
1263 COSTS_N_INSNS (4), /* SI */
1264 COSTS_N_INSNS (6), /* DI */
1265 COSTS_N_INSNS (6)}, /* other */
1266 0, /* cost of multiply per each bit set */
1267 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1268 COSTS_N_INSNS (35), /* HI */
1269 COSTS_N_INSNS (51), /* SI */
1270 COSTS_N_INSNS (83), /* DI */
1271 COSTS_N_INSNS (83)}, /* other */
1272 COSTS_N_INSNS (1), /* cost of movsx */
1273 COSTS_N_INSNS (1), /* cost of movzx */
1274 8, /* "large" insn */
1276 4, /* cost for loading QImode using movzbl */
1277 {5, 5, 4}, /* cost of loading integer registers
1278 in QImode, HImode and SImode.
1279 Relative to reg-reg move (2). */
1280 {4, 4, 4}, /* cost of storing integer registers */
1281 2, /* cost of reg,reg fld/fst */
1282 {5, 5, 12}, /* cost of loading fp registers
1283 in SFmode, DFmode and XFmode */
1284 {4, 4, 8}, /* cost of storing fp registers
1285 in SFmode, DFmode and XFmode */
1286 2, /* cost of moving MMX register */
1287 {4, 4}, /* cost of loading MMX registers
1288 in SImode and DImode */
1289 {4, 4}, /* cost of storing MMX registers
1290 in SImode and DImode */
1291 2, /* cost of moving SSE register */
1292 {4, 4, 4}, /* cost of loading SSE registers
1293 in SImode, DImode and TImode */
1294 {4, 4, 4}, /* cost of storing SSE registers
1295 in SImode, DImode and TImode */
1296 2, /* MMX or SSE register to integer */
1298 MOVD reg64, xmmreg Double FSTORE 4
1299 MOVD reg32, xmmreg Double FSTORE 4
1301 MOVD reg64, xmmreg Double FADD 3
1303 MOVD reg32, xmmreg Double FADD 3
1305 16, /* size of l1 cache. */
1306 2048, /* size of l2 cache. */
1307 64, /* size of prefetch block */
1308 /* New AMD processors never drop prefetches; if they cannot be performed
1309 immediately, they are queued. We set number of simultaneous prefetches
1310 to a large constant to reflect this (it probably is not a good idea not
1311 to limit number of prefetches at all, as their execution also takes some
1313 100, /* number of parallel prefetches */
1314 2, /* Branch cost */
1315 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1316 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1317 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1318 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1319 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1320 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1322 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1323 very small blocks it is better to use loop. For large blocks, libcall
1324 can do nontemporary accesses and beat inline considerably. */
1325 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1326 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1327 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1328 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1329 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1330 6, /* scalar_stmt_cost. */
1331 4, /* scalar load_cost. */
1332 4, /* scalar_store_cost. */
1333 6, /* vec_stmt_cost. */
1334 0, /* vec_to_scalar_cost. */
1335 2, /* scalar_to_vec_cost. */
1336 4, /* vec_align_load_cost. */
1337 4, /* vec_unalign_load_cost. */
1338 4, /* vec_store_cost. */
1339 2, /* cond_taken_branch_cost. */
1340 1, /* cond_not_taken_branch_cost. */
1343 struct processor_costs bdver2_cost
= {
1344 COSTS_N_INSNS (1), /* cost of an add instruction */
1345 COSTS_N_INSNS (1), /* cost of a lea instruction */
1346 COSTS_N_INSNS (1), /* variable shift costs */
1347 COSTS_N_INSNS (1), /* constant shift costs */
1348 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1349 COSTS_N_INSNS (4), /* HI */
1350 COSTS_N_INSNS (4), /* SI */
1351 COSTS_N_INSNS (6), /* DI */
1352 COSTS_N_INSNS (6)}, /* other */
1353 0, /* cost of multiply per each bit set */
1354 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1355 COSTS_N_INSNS (35), /* HI */
1356 COSTS_N_INSNS (51), /* SI */
1357 COSTS_N_INSNS (83), /* DI */
1358 COSTS_N_INSNS (83)}, /* other */
1359 COSTS_N_INSNS (1), /* cost of movsx */
1360 COSTS_N_INSNS (1), /* cost of movzx */
1361 8, /* "large" insn */
1363 4, /* cost for loading QImode using movzbl */
1364 {5, 5, 4}, /* cost of loading integer registers
1365 in QImode, HImode and SImode.
1366 Relative to reg-reg move (2). */
1367 {4, 4, 4}, /* cost of storing integer registers */
1368 2, /* cost of reg,reg fld/fst */
1369 {5, 5, 12}, /* cost of loading fp registers
1370 in SFmode, DFmode and XFmode */
1371 {4, 4, 8}, /* cost of storing fp registers
1372 in SFmode, DFmode and XFmode */
1373 2, /* cost of moving MMX register */
1374 {4, 4}, /* cost of loading MMX registers
1375 in SImode and DImode */
1376 {4, 4}, /* cost of storing MMX registers
1377 in SImode and DImode */
1378 2, /* cost of moving SSE register */
1379 {4, 4, 4}, /* cost of loading SSE registers
1380 in SImode, DImode and TImode */
1381 {4, 4, 4}, /* cost of storing SSE registers
1382 in SImode, DImode and TImode */
1383 2, /* MMX or SSE register to integer */
1385 MOVD reg64, xmmreg Double FSTORE 4
1386 MOVD reg32, xmmreg Double FSTORE 4
1388 MOVD reg64, xmmreg Double FADD 3
1390 MOVD reg32, xmmreg Double FADD 3
1392 16, /* size of l1 cache. */
1393 2048, /* size of l2 cache. */
1394 64, /* size of prefetch block */
1395 /* New AMD processors never drop prefetches; if they cannot be performed
1396 immediately, they are queued. We set number of simultaneous prefetches
1397 to a large constant to reflect this (it probably is not a good idea not
1398 to limit number of prefetches at all, as their execution also takes some
1400 100, /* number of parallel prefetches */
1401 2, /* Branch cost */
1402 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1403 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1404 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1405 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1406 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1407 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1409 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1410 very small blocks it is better to use loop. For large blocks, libcall
1411 can do nontemporary accesses and beat inline considerably. */
1412 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1413 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1414 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1415 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1416 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1417 6, /* scalar_stmt_cost. */
1418 4, /* scalar load_cost. */
1419 4, /* scalar_store_cost. */
1420 6, /* vec_stmt_cost. */
1421 0, /* vec_to_scalar_cost. */
1422 2, /* scalar_to_vec_cost. */
1423 4, /* vec_align_load_cost. */
1424 4, /* vec_unalign_load_cost. */
1425 4, /* vec_store_cost. */
1426 2, /* cond_taken_branch_cost. */
1427 1, /* cond_not_taken_branch_cost. */
1430 struct processor_costs btver1_cost
= {
1431 COSTS_N_INSNS (1), /* cost of an add instruction */
1432 COSTS_N_INSNS (2), /* cost of a lea instruction */
1433 COSTS_N_INSNS (1), /* variable shift costs */
1434 COSTS_N_INSNS (1), /* constant shift costs */
1435 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1436 COSTS_N_INSNS (4), /* HI */
1437 COSTS_N_INSNS (3), /* SI */
1438 COSTS_N_INSNS (4), /* DI */
1439 COSTS_N_INSNS (5)}, /* other */
1440 0, /* cost of multiply per each bit set */
1441 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1442 COSTS_N_INSNS (35), /* HI */
1443 COSTS_N_INSNS (51), /* SI */
1444 COSTS_N_INSNS (83), /* DI */
1445 COSTS_N_INSNS (83)}, /* other */
1446 COSTS_N_INSNS (1), /* cost of movsx */
1447 COSTS_N_INSNS (1), /* cost of movzx */
1448 8, /* "large" insn */
1450 4, /* cost for loading QImode using movzbl */
1451 {3, 4, 3}, /* cost of loading integer registers
1452 in QImode, HImode and SImode.
1453 Relative to reg-reg move (2). */
1454 {3, 4, 3}, /* cost of storing integer registers */
1455 4, /* cost of reg,reg fld/fst */
1456 {4, 4, 12}, /* cost of loading fp registers
1457 in SFmode, DFmode and XFmode */
1458 {6, 6, 8}, /* cost of storing fp registers
1459 in SFmode, DFmode and XFmode */
1460 2, /* cost of moving MMX register */
1461 {3, 3}, /* cost of loading MMX registers
1462 in SImode and DImode */
1463 {4, 4}, /* cost of storing MMX registers
1464 in SImode and DImode */
1465 2, /* cost of moving SSE register */
1466 {4, 4, 3}, /* cost of loading SSE registers
1467 in SImode, DImode and TImode */
1468 {4, 4, 5}, /* cost of storing SSE registers
1469 in SImode, DImode and TImode */
1470 3, /* MMX or SSE register to integer */
1472 MOVD reg64, xmmreg Double FSTORE 4
1473 MOVD reg32, xmmreg Double FSTORE 4
1475 MOVD reg64, xmmreg Double FADD 3
1477 MOVD reg32, xmmreg Double FADD 3
1479 32, /* size of l1 cache. */
1480 512, /* size of l2 cache. */
1481 64, /* size of prefetch block */
1482 100, /* number of parallel prefetches */
1483 2, /* Branch cost */
1484 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1485 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1486 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1487 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1488 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1489 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1491 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1492 very small blocks it is better to use loop. For large blocks, libcall can
1493 do nontemporary accesses and beat inline considerably. */
1494 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1495 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1496 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1497 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1498 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1499 4, /* scalar_stmt_cost. */
1500 2, /* scalar load_cost. */
1501 2, /* scalar_store_cost. */
1502 6, /* vec_stmt_cost. */
1503 0, /* vec_to_scalar_cost. */
1504 2, /* scalar_to_vec_cost. */
1505 2, /* vec_align_load_cost. */
1506 2, /* vec_unalign_load_cost. */
1507 2, /* vec_store_cost. */
1508 2, /* cond_taken_branch_cost. */
1509 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs btver2_cost
= {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (2), /* cost of a lea instruction */
1515 COSTS_N_INSNS (1), /* variable shift costs */
1516 COSTS_N_INSNS (1), /* constant shift costs */
1517 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (4), /* HI */
1519 COSTS_N_INSNS (3), /* SI */
1520 COSTS_N_INSNS (4), /* DI */
1521 COSTS_N_INSNS (5)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (35), /* HI */
1525 COSTS_N_INSNS (51), /* SI */
1526 COSTS_N_INSNS (83), /* DI */
1527 COSTS_N_INSNS (83)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 8, /* "large" insn */
1532 4, /* cost for loading QImode using movzbl */
1533 {3, 4, 3}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {3, 4, 3}, /* cost of storing integer registers */
1537 4, /* cost of reg,reg fld/fst */
1538 {4, 4, 12}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {6, 6, 8}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {3, 3}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {4, 4}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 2, /* cost of moving SSE register */
1548 {4, 4, 3}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {4, 4, 5}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 3, /* MMX or SSE register to integer */
1554 MOVD reg64, xmmreg Double FSTORE 4
1555 MOVD reg32, xmmreg Double FSTORE 4
1557 MOVD reg64, xmmreg Double FADD 3
1559 MOVD reg32, xmmreg Double FADD 3
1561 32, /* size of l1 cache. */
1562 2048, /* size of l2 cache. */
1563 64, /* size of prefetch block */
1564 100, /* number of parallel prefetches */
1565 2, /* Branch cost */
1566 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1567 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1568 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1569 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1570 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1571 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1573 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1574 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1575 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1576 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1577 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1578 4, /* scalar_stmt_cost. */
1579 2, /* scalar load_cost. */
1580 2, /* scalar_store_cost. */
1581 6, /* vec_stmt_cost. */
1582 0, /* vec_to_scalar_cost. */
1583 2, /* scalar_to_vec_cost. */
1584 2, /* vec_align_load_cost. */
1585 2, /* vec_unalign_load_cost. */
1586 2, /* vec_store_cost. */
1587 2, /* cond_taken_branch_cost. */
1588 1, /* cond_not_taken_branch_cost. */
1592 struct processor_costs pentium4_cost
= {
1593 COSTS_N_INSNS (1), /* cost of an add instruction */
1594 COSTS_N_INSNS (3), /* cost of a lea instruction */
1595 COSTS_N_INSNS (4), /* variable shift costs */
1596 COSTS_N_INSNS (4), /* constant shift costs */
1597 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1598 COSTS_N_INSNS (15), /* HI */
1599 COSTS_N_INSNS (15), /* SI */
1600 COSTS_N_INSNS (15), /* DI */
1601 COSTS_N_INSNS (15)}, /* other */
1602 0, /* cost of multiply per each bit set */
1603 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1604 COSTS_N_INSNS (56), /* HI */
1605 COSTS_N_INSNS (56), /* SI */
1606 COSTS_N_INSNS (56), /* DI */
1607 COSTS_N_INSNS (56)}, /* other */
1608 COSTS_N_INSNS (1), /* cost of movsx */
1609 COSTS_N_INSNS (1), /* cost of movzx */
1610 16, /* "large" insn */
1612 2, /* cost for loading QImode using movzbl */
1613 {4, 5, 4}, /* cost of loading integer registers
1614 in QImode, HImode and SImode.
1615 Relative to reg-reg move (2). */
1616 {2, 3, 2}, /* cost of storing integer registers */
1617 2, /* cost of reg,reg fld/fst */
1618 {2, 2, 6}, /* cost of loading fp registers
1619 in SFmode, DFmode and XFmode */
1620 {4, 4, 6}, /* cost of storing fp registers
1621 in SFmode, DFmode and XFmode */
1622 2, /* cost of moving MMX register */
1623 {2, 2}, /* cost of loading MMX registers
1624 in SImode and DImode */
1625 {2, 2}, /* cost of storing MMX registers
1626 in SImode and DImode */
1627 12, /* cost of moving SSE register */
1628 {12, 12, 12}, /* cost of loading SSE registers
1629 in SImode, DImode and TImode */
1630 {2, 2, 8}, /* cost of storing SSE registers
1631 in SImode, DImode and TImode */
1632 10, /* MMX or SSE register to integer */
1633 8, /* size of l1 cache. */
1634 256, /* size of l2 cache. */
1635 64, /* size of prefetch block */
1636 6, /* number of parallel prefetches */
1637 2, /* Branch cost */
1638 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1639 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1640 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1641 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1642 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1643 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1644 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1645 DUMMY_STRINGOP_ALGS
},
1646 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1648 DUMMY_STRINGOP_ALGS
},
1649 1, /* scalar_stmt_cost. */
1650 1, /* scalar load_cost. */
1651 1, /* scalar_store_cost. */
1652 1, /* vec_stmt_cost. */
1653 1, /* vec_to_scalar_cost. */
1654 1, /* scalar_to_vec_cost. */
1655 1, /* vec_align_load_cost. */
1656 2, /* vec_unalign_load_cost. */
1657 1, /* vec_store_cost. */
1658 3, /* cond_taken_branch_cost. */
1659 1, /* cond_not_taken_branch_cost. */
1663 struct processor_costs nocona_cost
= {
1664 COSTS_N_INSNS (1), /* cost of an add instruction */
1665 COSTS_N_INSNS (1), /* cost of a lea instruction */
1666 COSTS_N_INSNS (1), /* variable shift costs */
1667 COSTS_N_INSNS (1), /* constant shift costs */
1668 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1669 COSTS_N_INSNS (10), /* HI */
1670 COSTS_N_INSNS (10), /* SI */
1671 COSTS_N_INSNS (10), /* DI */
1672 COSTS_N_INSNS (10)}, /* other */
1673 0, /* cost of multiply per each bit set */
1674 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1675 COSTS_N_INSNS (66), /* HI */
1676 COSTS_N_INSNS (66), /* SI */
1677 COSTS_N_INSNS (66), /* DI */
1678 COSTS_N_INSNS (66)}, /* other */
1679 COSTS_N_INSNS (1), /* cost of movsx */
1680 COSTS_N_INSNS (1), /* cost of movzx */
1681 16, /* "large" insn */
1682 17, /* MOVE_RATIO */
1683 4, /* cost for loading QImode using movzbl */
1684 {4, 4, 4}, /* cost of loading integer registers
1685 in QImode, HImode and SImode.
1686 Relative to reg-reg move (2). */
1687 {4, 4, 4}, /* cost of storing integer registers */
1688 3, /* cost of reg,reg fld/fst */
1689 {12, 12, 12}, /* cost of loading fp registers
1690 in SFmode, DFmode and XFmode */
1691 {4, 4, 4}, /* cost of storing fp registers
1692 in SFmode, DFmode and XFmode */
1693 6, /* cost of moving MMX register */
1694 {12, 12}, /* cost of loading MMX registers
1695 in SImode and DImode */
1696 {12, 12}, /* cost of storing MMX registers
1697 in SImode and DImode */
1698 6, /* cost of moving SSE register */
1699 {12, 12, 12}, /* cost of loading SSE registers
1700 in SImode, DImode and TImode */
1701 {12, 12, 12}, /* cost of storing SSE registers
1702 in SImode, DImode and TImode */
1703 8, /* MMX or SSE register to integer */
1704 8, /* size of l1 cache. */
1705 1024, /* size of l2 cache. */
1706 128, /* size of prefetch block */
1707 8, /* number of parallel prefetches */
1708 1, /* Branch cost */
1709 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1710 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1711 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1712 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1713 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1714 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1715 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1716 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1717 {100000, unrolled_loop
}, {-1, libcall
}}}},
1718 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1720 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1721 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1722 1, /* scalar_stmt_cost. */
1723 1, /* scalar load_cost. */
1724 1, /* scalar_store_cost. */
1725 1, /* vec_stmt_cost. */
1726 1, /* vec_to_scalar_cost. */
1727 1, /* scalar_to_vec_cost. */
1728 1, /* vec_align_load_cost. */
1729 2, /* vec_unalign_load_cost. */
1730 1, /* vec_store_cost. */
1731 3, /* cond_taken_branch_cost. */
1732 1, /* cond_not_taken_branch_cost. */
1736 struct processor_costs atom_cost
= {
1737 COSTS_N_INSNS (1), /* cost of an add instruction */
1738 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1739 COSTS_N_INSNS (1), /* variable shift costs */
1740 COSTS_N_INSNS (1), /* constant shift costs */
1741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1742 COSTS_N_INSNS (4), /* HI */
1743 COSTS_N_INSNS (3), /* SI */
1744 COSTS_N_INSNS (4), /* DI */
1745 COSTS_N_INSNS (2)}, /* other */
1746 0, /* cost of multiply per each bit set */
1747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1748 COSTS_N_INSNS (26), /* HI */
1749 COSTS_N_INSNS (42), /* SI */
1750 COSTS_N_INSNS (74), /* DI */
1751 COSTS_N_INSNS (74)}, /* other */
1752 COSTS_N_INSNS (1), /* cost of movsx */
1753 COSTS_N_INSNS (1), /* cost of movzx */
1754 8, /* "large" insn */
1755 17, /* MOVE_RATIO */
1756 4, /* cost for loading QImode using movzbl */
1757 {4, 4, 4}, /* cost of loading integer registers
1758 in QImode, HImode and SImode.
1759 Relative to reg-reg move (2). */
1760 {4, 4, 4}, /* cost of storing integer registers */
1761 4, /* cost of reg,reg fld/fst */
1762 {12, 12, 12}, /* cost of loading fp registers
1763 in SFmode, DFmode and XFmode */
1764 {6, 6, 8}, /* cost of storing fp registers
1765 in SFmode, DFmode and XFmode */
1766 2, /* cost of moving MMX register */
1767 {8, 8}, /* cost of loading MMX registers
1768 in SImode and DImode */
1769 {8, 8}, /* cost of storing MMX registers
1770 in SImode and DImode */
1771 2, /* cost of moving SSE register */
1772 {8, 8, 8}, /* cost of loading SSE registers
1773 in SImode, DImode and TImode */
1774 {8, 8, 8}, /* cost of storing SSE registers
1775 in SImode, DImode and TImode */
1776 5, /* MMX or SSE register to integer */
1777 32, /* size of l1 cache. */
1778 256, /* size of l2 cache. */
1779 64, /* size of prefetch block */
1780 6, /* number of parallel prefetches */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1789 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1790 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1791 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1792 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1793 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1794 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1795 1, /* scalar_stmt_cost. */
1796 1, /* scalar load_cost. */
1797 1, /* scalar_store_cost. */
1798 1, /* vec_stmt_cost. */
1799 1, /* vec_to_scalar_cost. */
1800 1, /* scalar_to_vec_cost. */
1801 1, /* vec_align_load_cost. */
1802 2, /* vec_unalign_load_cost. */
1803 1, /* vec_store_cost. */
1804 3, /* cond_taken_branch_cost. */
1805 1, /* cond_not_taken_branch_cost. */
1808 /* Generic64 should produce code tuned for Nocona and K8. */
1810 struct processor_costs generic64_cost
= {
1811 COSTS_N_INSNS (1), /* cost of an add instruction */
1812 /* On all chips taken into consideration lea is 2 cycles and more. With
1813 this cost however our current implementation of synth_mult results in
1814 use of unnecessary temporary registers causing regression on several
1815 SPECfp benchmarks. */
1816 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1817 COSTS_N_INSNS (1), /* variable shift costs */
1818 COSTS_N_INSNS (1), /* constant shift costs */
1819 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1820 COSTS_N_INSNS (4), /* HI */
1821 COSTS_N_INSNS (3), /* SI */
1822 COSTS_N_INSNS (4), /* DI */
1823 COSTS_N_INSNS (2)}, /* other */
1824 0, /* cost of multiply per each bit set */
1825 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1826 COSTS_N_INSNS (26), /* HI */
1827 COSTS_N_INSNS (42), /* SI */
1828 COSTS_N_INSNS (74), /* DI */
1829 COSTS_N_INSNS (74)}, /* other */
1830 COSTS_N_INSNS (1), /* cost of movsx */
1831 COSTS_N_INSNS (1), /* cost of movzx */
1832 8, /* "large" insn */
1833 17, /* MOVE_RATIO */
1834 4, /* cost for loading QImode using movzbl */
1835 {4, 4, 4}, /* cost of loading integer registers
1836 in QImode, HImode and SImode.
1837 Relative to reg-reg move (2). */
1838 {4, 4, 4}, /* cost of storing integer registers */
1839 4, /* cost of reg,reg fld/fst */
1840 {12, 12, 12}, /* cost of loading fp registers
1841 in SFmode, DFmode and XFmode */
1842 {6, 6, 8}, /* cost of storing fp registers
1843 in SFmode, DFmode and XFmode */
1844 2, /* cost of moving MMX register */
1845 {8, 8}, /* cost of loading MMX registers
1846 in SImode and DImode */
1847 {8, 8}, /* cost of storing MMX registers
1848 in SImode and DImode */
1849 2, /* cost of moving SSE register */
1850 {8, 8, 8}, /* cost of loading SSE registers
1851 in SImode, DImode and TImode */
1852 {8, 8, 8}, /* cost of storing SSE registers
1853 in SImode, DImode and TImode */
1854 5, /* MMX or SSE register to integer */
1855 32, /* size of l1 cache. */
1856 512, /* size of l2 cache. */
1857 64, /* size of prefetch block */
1858 6, /* number of parallel prefetches */
1859 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1860 value is increased to perhaps more appropriate value of 5. */
1861 3, /* Branch cost */
1862 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1863 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1864 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1865 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1866 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1867 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1868 {DUMMY_STRINGOP_ALGS
,
1869 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1870 {DUMMY_STRINGOP_ALGS
,
1871 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1872 1, /* scalar_stmt_cost. */
1873 1, /* scalar load_cost. */
1874 1, /* scalar_store_cost. */
1875 1, /* vec_stmt_cost. */
1876 1, /* vec_to_scalar_cost. */
1877 1, /* scalar_to_vec_cost. */
1878 1, /* vec_align_load_cost. */
1879 2, /* vec_unalign_load_cost. */
1880 1, /* vec_store_cost. */
1881 3, /* cond_taken_branch_cost. */
1882 1, /* cond_not_taken_branch_cost. */
1885 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1888 struct processor_costs generic32_cost
= {
1889 COSTS_N_INSNS (1), /* cost of an add instruction */
1890 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1891 COSTS_N_INSNS (1), /* variable shift costs */
1892 COSTS_N_INSNS (1), /* constant shift costs */
1893 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1894 COSTS_N_INSNS (4), /* HI */
1895 COSTS_N_INSNS (3), /* SI */
1896 COSTS_N_INSNS (4), /* DI */
1897 COSTS_N_INSNS (2)}, /* other */
1898 0, /* cost of multiply per each bit set */
1899 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1900 COSTS_N_INSNS (26), /* HI */
1901 COSTS_N_INSNS (42), /* SI */
1902 COSTS_N_INSNS (74), /* DI */
1903 COSTS_N_INSNS (74)}, /* other */
1904 COSTS_N_INSNS (1), /* cost of movsx */
1905 COSTS_N_INSNS (1), /* cost of movzx */
1906 8, /* "large" insn */
1907 17, /* MOVE_RATIO */
1908 4, /* cost for loading QImode using movzbl */
1909 {4, 4, 4}, /* cost of loading integer registers
1910 in QImode, HImode and SImode.
1911 Relative to reg-reg move (2). */
1912 {4, 4, 4}, /* cost of storing integer registers */
1913 4, /* cost of reg,reg fld/fst */
1914 {12, 12, 12}, /* cost of loading fp registers
1915 in SFmode, DFmode and XFmode */
1916 {6, 6, 8}, /* cost of storing fp registers
1917 in SFmode, DFmode and XFmode */
1918 2, /* cost of moving MMX register */
1919 {8, 8}, /* cost of loading MMX registers
1920 in SImode and DImode */
1921 {8, 8}, /* cost of storing MMX registers
1922 in SImode and DImode */
1923 2, /* cost of moving SSE register */
1924 {8, 8, 8}, /* cost of loading SSE registers
1925 in SImode, DImode and TImode */
1926 {8, 8, 8}, /* cost of storing SSE registers
1927 in SImode, DImode and TImode */
1928 5, /* MMX or SSE register to integer */
1929 32, /* size of l1 cache. */
1930 256, /* size of l2 cache. */
1931 64, /* size of prefetch block */
1932 6, /* number of parallel prefetches */
1933 3, /* Branch cost */
1934 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1935 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1936 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1937 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1938 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1939 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1940 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1941 DUMMY_STRINGOP_ALGS
},
1942 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1943 DUMMY_STRINGOP_ALGS
},
1944 1, /* scalar_stmt_cost. */
1945 1, /* scalar load_cost. */
1946 1, /* scalar_store_cost. */
1947 1, /* vec_stmt_cost. */
1948 1, /* vec_to_scalar_cost. */
1949 1, /* scalar_to_vec_cost. */
1950 1, /* vec_align_load_cost. */
1951 2, /* vec_unalign_load_cost. */
1952 1, /* vec_store_cost. */
1953 3, /* cond_taken_branch_cost. */
1954 1, /* cond_not_taken_branch_cost. */
1957 /* Set by -mtune. */
1958 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1960 /* Set by -mtune or -Os. */
1961 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1963 /* Processor feature/optimization bitmasks. */
1964 #define m_386 (1<<PROCESSOR_I386)
1965 #define m_486 (1<<PROCESSOR_I486)
1966 #define m_PENT (1<<PROCESSOR_PENTIUM)
1967 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1968 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1969 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1970 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1971 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1972 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1973 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1974 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1975 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1976 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1977 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1978 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1979 #define m_ATOM (1<<PROCESSOR_ATOM)
1981 #define m_GEODE (1<<PROCESSOR_GEODE)
1982 #define m_K6 (1<<PROCESSOR_K6)
1983 #define m_K6_GEODE (m_K6 | m_GEODE)
1984 #define m_K8 (1<<PROCESSOR_K8)
1985 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1986 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1987 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1988 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1989 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1990 #define m_BDVER (m_BDVER1 | m_BDVER2)
1991 #define m_BTVER (m_BTVER1 | m_BTVER2)
1992 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1993 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1994 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1996 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1997 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1999 /* Generic instruction choice should be common subset of supported CPUs
2000 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
2001 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
2003 /* Feature tests against the various tunings. */
2004 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
2006 /* Feature tests against the various tunings used to create ix86_tune_features
2007 based on the processor mask. */
2008 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
2009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
2010 negatively, so enabling for Generic64 seems like good code size
2011 tradeoff. We can't enable it for 32bit generic because it does not
2012 work well with PPro base chips. */
2013 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
2015 /* X86_TUNE_PUSH_MEMORY */
2016 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2018 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
2021 /* X86_TUNE_UNROLL_STRLEN */
2022 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
2024 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
2025 on simulation result. But after P4 was made, no performance benefit
2026 was observed with branch hints. It also increases the code size.
2027 As a result, icc never generates branch hints. */
2030 /* X86_TUNE_DOUBLE_WITH_ADD */
2033 /* X86_TUNE_USE_SAHF */
2034 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
2036 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
2037 partial dependencies. */
2038 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2040 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
2041 register stalls on Generic32 compilation setting as well. However
2042 in current implementation the partial register stalls are not eliminated
2043 very well - they can be introduced via subregs synthesized by combine
2044 and can happen in caller/callee saving sequences. Because this option
2045 pays back little on PPro based chips and is in conflict with partial reg
2046 dependencies used by Athlon/P4 based chips, it is better to leave it off
2047 for generic32 for now. */
2050 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
2051 m_CORE2I7
| m_GENERIC
,
2053 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
2054 * on 16-bit immediate moves into memory on Core2 and Corei7. */
2055 m_CORE2I7
| m_GENERIC
,
2057 /* X86_TUNE_USE_HIMODE_FIOP */
2058 m_386
| m_486
| m_K6_GEODE
,
2060 /* X86_TUNE_USE_SIMODE_FIOP */
2061 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
2063 /* X86_TUNE_USE_MOV0 */
2066 /* X86_TUNE_USE_CLTD */
2067 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
2069 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
2072 /* X86_TUNE_SPLIT_LONG_MOVES */
2075 /* X86_TUNE_READ_MODIFY_WRITE */
2078 /* X86_TUNE_READ_MODIFY */
2081 /* X86_TUNE_PROMOTE_QIMODE */
2082 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2084 /* X86_TUNE_FAST_PREFIX */
2085 ~(m_386
| m_486
| m_PENT
),
2087 /* X86_TUNE_SINGLE_STRINGOP */
2088 m_386
| m_P4_NOCONA
,
2090 /* X86_TUNE_QIMODE_MATH */
2093 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2094 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2095 might be considered for Generic32 if our scheme for avoiding partial
2096 stalls was more effective. */
2099 /* X86_TUNE_PROMOTE_QI_REGS */
2102 /* X86_TUNE_PROMOTE_HI_REGS */
2105 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2106 over esp addition. */
2107 m_386
| m_486
| m_PENT
| m_PPRO
,
2109 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2110 over esp addition. */
2113 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2114 over esp subtraction. */
2115 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2117 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2118 over esp subtraction. */
2119 m_PENT
| m_K6_GEODE
,
2121 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2122 for DFmode copies */
2123 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2125 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2126 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2128 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2129 conflict here in between PPro/Pentium4 based chips that thread 128bit
2130 SSE registers as single units versus K8 based chips that divide SSE
2131 registers to two 64bit halves. This knob promotes all store destinations
2132 to be 128bit to allow register renaming on 128bit SSE units, but usually
2133 results in one extra microop on 64bit SSE units. Experimental results
2134 shows that disabling this option on P4 brings over 20% SPECfp regression,
2135 while enabling it on K8 brings roughly 2.4% regression that can be partly
2136 masked by careful scheduling of moves. */
2137 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2139 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2140 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
,
2142 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2145 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2149 are resolved on SSE register parts instead of whole registers, so we may
2150 maintain just lower part of scalar values in proper format leaving the
2151 upper part undefined. */
2154 /* X86_TUNE_SSE_TYPELESS_STORES */
2157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2158 m_PPRO
| m_P4_NOCONA
,
2160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2161 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2163 /* X86_TUNE_PROLOGUE_USING_MOVE */
2164 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2166 /* X86_TUNE_EPILOGUE_USING_MOVE */
2167 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2169 /* X86_TUNE_SHIFT1 */
2172 /* X86_TUNE_USE_FFREEP */
2175 /* X86_TUNE_INTER_UNIT_MOVES */
2176 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2178 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2179 ~(m_AMDFAM10
| m_BDVER
),
2181 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2182 than 4 branch instructions in the 16 byte window. */
2183 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2185 /* X86_TUNE_SCHEDULE */
2186 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2188 /* X86_TUNE_USE_BT */
2189 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2191 /* X86_TUNE_USE_INCDEC */
2192 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2194 /* X86_TUNE_PAD_RETURNS */
2195 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2197 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2200 /* X86_TUNE_EXT_80387_CONSTANTS */
2201 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2203 /* X86_TUNE_SHORTEN_X87_SSE */
2206 /* X86_TUNE_AVOID_VECTOR_DECODE */
2207 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2209 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2210 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2213 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2214 vector path on AMD machines. */
2215 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2217 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2219 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2221 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2225 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2226 but one byte longer. */
2229 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2230 operand that cannot be represented using a modRM byte. The XOR
2231 replacement is long decoded, so this split helps here as well. */
2234 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2236 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2238 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2239 from integer to FP. */
2242 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2243 with a subsequent conditional jump instruction into a single
2244 compare-and-branch uop. */
2247 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2248 will impact LEA instruction selection. */
2251 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2255 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2256 at -O3. For the moment, the prefetching seems badly tuned for Intel
2258 m_K6_GEODE
| m_AMD_MULTIPLE
,
2260 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2261 the auto-vectorizer. */
2264 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2265 during reassociation of integer computation. */
2268 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2269 during reassociation of fp computation. */
2273 /* Feature tests against the various architecture variations. */
2274 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2276 /* Feature tests against the various architecture variations, used to create
2277 ix86_arch_features based on the processor mask. */
2278 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2279 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2280 ~(m_386
| m_486
| m_PENT
| m_K6
),
2282 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2285 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2288 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2291 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2295 static const unsigned int x86_accumulate_outgoing_args
2296 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2298 static const unsigned int x86_arch_always_fancy_math_387
2299 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2301 static const unsigned int x86_avx256_split_unaligned_load
2302 = m_COREI7
| m_GENERIC
;
2304 static const unsigned int x86_avx256_split_unaligned_store
2305 = m_COREI7
| m_BDVER
| m_GENERIC
;
2307 /* In case the average insn count for single function invocation is
2308 lower than this constant, emit fast (but longer) prologue and
2310 #define FAST_PROLOGUE_INSN_COUNT 20
2312 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2313 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2314 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2315 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2317 /* Array of the smallest class containing reg number REGNO, indexed by
2318 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2320 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2322 /* ax, dx, cx, bx */
2323 AREG
, DREG
, CREG
, BREG
,
2324 /* si, di, bp, sp */
2325 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2327 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2328 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2331 /* flags, fpsr, fpcr, frame */
2332 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2334 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2337 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2340 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2341 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2342 /* SSE REX registers */
2343 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2347 /* The "default" register map used in 32bit mode. */
2349 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2351 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2352 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2353 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2354 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2355 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2356 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2357 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2360 /* The "default" register map used in 64bit mode. */
2362 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2364 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2365 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2366 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2367 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2368 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2369 8,9,10,11,12,13,14,15, /* extended integer registers */
2370 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2373 /* Define the register numbers to be used in Dwarf debugging information.
2374 The SVR4 reference port C compiler uses the following register numbers
2375 in its Dwarf output code:
2376 0 for %eax (gcc regno = 0)
2377 1 for %ecx (gcc regno = 2)
2378 2 for %edx (gcc regno = 1)
2379 3 for %ebx (gcc regno = 3)
2380 4 for %esp (gcc regno = 7)
2381 5 for %ebp (gcc regno = 6)
2382 6 for %esi (gcc regno = 4)
2383 7 for %edi (gcc regno = 5)
2384 The following three DWARF register numbers are never generated by
2385 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2386 believes these numbers have these meanings.
2387 8 for %eip (no gcc equivalent)
2388 9 for %eflags (gcc regno = 17)
2389 10 for %trapno (no gcc equivalent)
2390 It is not at all clear how we should number the FP stack registers
2391 for the x86 architecture. If the version of SDB on x86/svr4 were
2392 a bit less brain dead with respect to floating-point then we would
2393 have a precedent to follow with respect to DWARF register numbers
2394 for x86 FP registers, but the SDB on x86/svr4 is so completely
2395 broken with respect to FP registers that it is hardly worth thinking
2396 of it as something to strive for compatibility with.
2397 The version of x86/svr4 SDB I have at the moment does (partially)
2398 seem to believe that DWARF register number 11 is associated with
2399 the x86 register %st(0), but that's about all. Higher DWARF
2400 register numbers don't seem to be associated with anything in
2401 particular, and even for DWARF regno 11, SDB only seems to under-
2402 stand that it should say that a variable lives in %st(0) (when
2403 asked via an `=' command) if we said it was in DWARF regno 11,
2404 but SDB still prints garbage when asked for the value of the
2405 variable in question (via a `/' command).
2406 (Also note that the labels SDB prints for various FP stack regs
2407 when doing an `x' command are all wrong.)
2408 Note that these problems generally don't affect the native SVR4
2409 C compiler because it doesn't allow the use of -O with -g and
2410 because when it is *not* optimizing, it allocates a memory
2411 location for each floating-point variable, and the memory
2412 location is what gets described in the DWARF AT_location
2413 attribute for the variable in question.
2414 Regardless of the severe mental illness of the x86/svr4 SDB, we
2415 do something sensible here and we use the following DWARF
2416 register numbers. Note that these are all stack-top-relative
2418 11 for %st(0) (gcc regno = 8)
2419 12 for %st(1) (gcc regno = 9)
2420 13 for %st(2) (gcc regno = 10)
2421 14 for %st(3) (gcc regno = 11)
2422 15 for %st(4) (gcc regno = 12)
2423 16 for %st(5) (gcc regno = 13)
2424 17 for %st(6) (gcc regno = 14)
2425 18 for %st(7) (gcc regno = 15)
2427 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2429 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2430 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2431 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2432 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2433 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2434 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2435 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2438 /* Define parameter passing and return registers. */
2440 static int const x86_64_int_parameter_registers
[6] =
2442 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2445 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2447 CX_REG
, DX_REG
, R8_REG
, R9_REG
2450 static int const x86_64_int_return_registers
[4] =
2452 AX_REG
, DX_REG
, DI_REG
, SI_REG
2455 /* Define the structure for the machine field in struct function. */
2457 struct GTY(()) stack_local_entry
{
2458 unsigned short mode
;
2461 struct stack_local_entry
*next
;
2464 /* Structure describing stack frame layout.
2465 Stack grows downward:
2471 saved static chain if ix86_static_chain_on_stack
2473 saved frame pointer if frame_pointer_needed
2474 <- HARD_FRAME_POINTER
2480 <- sse_regs_save_offset
2483 [va_arg registers] |
2487 [padding2] | = to_allocate
2496 int outgoing_arguments_size
;
2498 /* The offsets relative to ARG_POINTER. */
2499 HOST_WIDE_INT frame_pointer_offset
;
2500 HOST_WIDE_INT hard_frame_pointer_offset
;
2501 HOST_WIDE_INT stack_pointer_offset
;
2502 HOST_WIDE_INT hfp_save_offset
;
2503 HOST_WIDE_INT reg_save_offset
;
2504 HOST_WIDE_INT sse_reg_save_offset
;
2506 /* When save_regs_using_mov is set, emit prologue using
2507 move instead of push instructions. */
2508 bool save_regs_using_mov
;
2511 /* Which cpu are we scheduling for. */
2512 enum attr_cpu ix86_schedule
;
2514 /* Which cpu are we optimizing for. */
2515 enum processor_type ix86_tune
;
2517 /* Which instruction set architecture to use. */
2518 enum processor_type ix86_arch
;
2520 /* true if sse prefetch instruction is not NOOP. */
2521 int x86_prefetch_sse
;
2523 /* -mstackrealign option */
2524 static const char ix86_force_align_arg_pointer_string
[]
2525 = "force_align_arg_pointer";
2527 static rtx (*ix86_gen_leave
) (void);
2528 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2529 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2530 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2531 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2532 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2533 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2534 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2535 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2536 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2537 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2538 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2540 /* Preferred alignment for stack boundary in bits. */
2541 unsigned int ix86_preferred_stack_boundary
;
2543 /* Alignment for incoming stack boundary in bits specified at
2545 static unsigned int ix86_user_incoming_stack_boundary
;
2547 /* Default alignment for incoming stack boundary in bits. */
2548 static unsigned int ix86_default_incoming_stack_boundary
;
2550 /* Alignment for incoming stack boundary in bits. */
2551 unsigned int ix86_incoming_stack_boundary
;
2553 /* Calling abi specific va_list type nodes. */
2554 static GTY(()) tree sysv_va_list_type_node
;
2555 static GTY(()) tree ms_va_list_type_node
;
2557 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2558 char internal_label_prefix
[16];
2559 int internal_label_prefix_len
;
2561 /* Fence to use after loop using movnt. */
2564 /* Register class used for passing given 64bit part of the argument.
2565 These represent classes as documented by the PS ABI, with the exception
2566 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2567 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2569 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2570 whenever possible (upper half does contain padding). */
2571 enum x86_64_reg_class
2574 X86_64_INTEGER_CLASS
,
2575 X86_64_INTEGERSI_CLASS
,
2582 X86_64_COMPLEX_X87_CLASS
,
2586 #define MAX_CLASSES 4
2588 /* Table of constants used by fldpi, fldln2, etc.... */
2589 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2590 static bool ext_80387_constants_init
= 0;
2593 static struct machine_function
* ix86_init_machine_status (void);
2594 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2595 static bool ix86_function_value_regno_p (const unsigned int);
2596 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2598 static rtx
ix86_static_chain (const_tree
, bool);
2599 static int ix86_function_regparm (const_tree
, const_tree
);
2600 static void ix86_compute_frame_layout (struct ix86_frame
*);
2601 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2603 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2604 static tree
ix86_canonical_va_list_type (tree
);
2605 static void predict_jump (int);
2606 static unsigned int split_stack_prologue_scratch_regno (void);
2607 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2609 enum ix86_function_specific_strings
2611 IX86_FUNCTION_SPECIFIC_ARCH
,
2612 IX86_FUNCTION_SPECIFIC_TUNE
,
2613 IX86_FUNCTION_SPECIFIC_MAX
2616 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2617 const char *, enum fpmath_unit
, bool);
2618 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2619 static void ix86_function_specific_save (struct cl_target_option
*);
2620 static void ix86_function_specific_restore (struct cl_target_option
*);
2621 static void ix86_function_specific_print (FILE *, int,
2622 struct cl_target_option
*);
2623 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2624 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2625 struct gcc_options
*);
2626 static bool ix86_can_inline_p (tree
, tree
);
2627 static void ix86_set_current_function (tree
);
2628 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2630 static enum calling_abi
ix86_function_abi (const_tree
);
2633 #ifndef SUBTARGET32_DEFAULT_CPU
2634 #define SUBTARGET32_DEFAULT_CPU "i386"
2637 /* The svr4 ABI for the i386 says that records and unions are returned
2639 #ifndef DEFAULT_PCC_STRUCT_RETURN
2640 #define DEFAULT_PCC_STRUCT_RETURN 1
2643 /* Whether -mtune= or -march= were specified */
2644 static int ix86_tune_defaulted
;
2645 static int ix86_arch_specified
;
2647 /* Vectorization library interface and handlers. */
2648 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2650 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2651 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2653 /* Processor target table, indexed by processor number */
2656 const struct processor_costs
*cost
; /* Processor costs */
2657 const int align_loop
; /* Default alignments. */
2658 const int align_loop_max_skip
;
2659 const int align_jump
;
2660 const int align_jump_max_skip
;
2661 const int align_func
;
2664 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2666 {&i386_cost
, 4, 3, 4, 3, 4},
2667 {&i486_cost
, 16, 15, 16, 15, 16},
2668 {&pentium_cost
, 16, 7, 16, 7, 16},
2669 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2670 {&geode_cost
, 0, 0, 0, 0, 0},
2671 {&k6_cost
, 32, 7, 32, 7, 32},
2672 {&athlon_cost
, 16, 7, 16, 7, 16},
2673 {&pentium4_cost
, 0, 0, 0, 0, 0},
2674 {&k8_cost
, 16, 7, 16, 7, 16},
2675 {&nocona_cost
, 0, 0, 0, 0, 0},
2676 /* Core 2 32-bit. */
2677 {&generic32_cost
, 16, 10, 16, 10, 16},
2678 /* Core 2 64-bit. */
2679 {&generic64_cost
, 16, 10, 16, 10, 16},
2680 /* Core i7 32-bit. */
2681 {&generic32_cost
, 16, 10, 16, 10, 16},
2682 /* Core i7 64-bit. */
2683 {&generic64_cost
, 16, 10, 16, 10, 16},
2684 {&generic32_cost
, 16, 7, 16, 7, 16},
2685 {&generic64_cost
, 16, 10, 16, 10, 16},
2686 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2687 {&bdver1_cost
, 32, 24, 32, 7, 32},
2688 {&bdver2_cost
, 32, 24, 32, 7, 32},
2689 {&btver1_cost
, 32, 24, 32, 7, 32},
2690 {&btver2_cost
, 32, 24, 32, 7, 32},
2691 {&atom_cost
, 16, 15, 16, 7, 16}
2694 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2725 /* Return true if a red-zone is in use. */
2728 ix86_using_red_zone (void)
2730 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2733 /* Return a string that documents the current -m options. The caller is
2734 responsible for freeing the string. */
2737 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2738 const char *tune
, enum fpmath_unit fpmath
,
2741 struct ix86_target_opts
2743 const char *option
; /* option string */
2744 HOST_WIDE_INT mask
; /* isa mask options */
2747 /* This table is ordered so that options like -msse4.2 that imply
2748 preceding options while match those first. */
2749 static struct ix86_target_opts isa_opts
[] =
2751 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2752 { "-mfma", OPTION_MASK_ISA_FMA
},
2753 { "-mxop", OPTION_MASK_ISA_XOP
},
2754 { "-mlwp", OPTION_MASK_ISA_LWP
},
2755 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2756 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2757 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2758 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2759 { "-msse3", OPTION_MASK_ISA_SSE3
},
2760 { "-msse2", OPTION_MASK_ISA_SSE2
},
2761 { "-msse", OPTION_MASK_ISA_SSE
},
2762 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2763 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2764 { "-mmmx", OPTION_MASK_ISA_MMX
},
2765 { "-mabm", OPTION_MASK_ISA_ABM
},
2766 { "-mbmi", OPTION_MASK_ISA_BMI
},
2767 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2768 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2769 { "-mhle", OPTION_MASK_ISA_HLE
},
2770 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2771 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2772 { "-madx", OPTION_MASK_ISA_ADX
},
2773 { "-mtbm", OPTION_MASK_ISA_TBM
},
2774 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2775 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2776 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2777 { "-maes", OPTION_MASK_ISA_AES
},
2778 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2779 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2780 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2781 { "-mf16c", OPTION_MASK_ISA_F16C
},
2782 { "-mrtm", OPTION_MASK_ISA_RTM
},
2786 static struct ix86_target_opts flag_opts
[] =
2788 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2789 { "-m80387", MASK_80387
},
2790 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2791 { "-malign-double", MASK_ALIGN_DOUBLE
},
2792 { "-mcld", MASK_CLD
},
2793 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2794 { "-mieee-fp", MASK_IEEE_FP
},
2795 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2796 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2797 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2798 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2799 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2800 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2801 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2802 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2803 { "-mrecip", MASK_RECIP
},
2804 { "-mrtd", MASK_RTD
},
2805 { "-msseregparm", MASK_SSEREGPARM
},
2806 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2807 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2808 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2809 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2810 { "-mvzeroupper", MASK_VZEROUPPER
},
2811 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2812 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2813 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2816 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2819 char target_other
[40];
2829 memset (opts
, '\0', sizeof (opts
));
2831 /* Add -march= option. */
2834 opts
[num
][0] = "-march=";
2835 opts
[num
++][1] = arch
;
2838 /* Add -mtune= option. */
2841 opts
[num
][0] = "-mtune=";
2842 opts
[num
++][1] = tune
;
2845 /* Add -m32/-m64/-mx32. */
2846 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2848 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2852 isa
&= ~ (OPTION_MASK_ISA_64BIT
2853 | OPTION_MASK_ABI_64
2854 | OPTION_MASK_ABI_X32
);
2858 opts
[num
++][0] = abi
;
2860 /* Pick out the options in isa options. */
2861 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2863 if ((isa
& isa_opts
[i
].mask
) != 0)
2865 opts
[num
++][0] = isa_opts
[i
].option
;
2866 isa
&= ~ isa_opts
[i
].mask
;
2870 if (isa
&& add_nl_p
)
2872 opts
[num
++][0] = isa_other
;
2873 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2877 /* Add flag options. */
2878 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2880 if ((flags
& flag_opts
[i
].mask
) != 0)
2882 opts
[num
++][0] = flag_opts
[i
].option
;
2883 flags
&= ~ flag_opts
[i
].mask
;
2887 if (flags
&& add_nl_p
)
2889 opts
[num
++][0] = target_other
;
2890 sprintf (target_other
, "(other flags: %#x)", flags
);
2893 /* Add -fpmath= option. */
2896 opts
[num
][0] = "-mfpmath=";
2897 switch ((int) fpmath
)
2900 opts
[num
++][1] = "387";
2904 opts
[num
++][1] = "sse";
2907 case FPMATH_387
| FPMATH_SSE
:
2908 opts
[num
++][1] = "sse+387";
2920 gcc_assert (num
< ARRAY_SIZE (opts
));
2922 /* Size the string. */
2924 sep_len
= (add_nl_p
) ? 3 : 1;
2925 for (i
= 0; i
< num
; i
++)
2928 for (j
= 0; j
< 2; j
++)
2930 len
+= strlen (opts
[i
][j
]);
2933 /* Build the string. */
2934 ret
= ptr
= (char *) xmalloc (len
);
2937 for (i
= 0; i
< num
; i
++)
2941 for (j
= 0; j
< 2; j
++)
2942 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2949 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2957 for (j
= 0; j
< 2; j
++)
2960 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2962 line_len
+= len2
[j
];
2967 gcc_assert (ret
+ len
>= ptr
);
2972 /* Return true, if profiling code should be emitted before
2973 prologue. Otherwise it returns false.
2974 Note: For x86 with "hotfix" it is sorried. */
2976 ix86_profile_before_prologue (void)
2978 return flag_fentry
!= 0;
2981 /* Function that is callable from the debugger to print the current
2984 ix86_debug_options (void)
2986 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2987 ix86_arch_string
, ix86_tune_string
,
2992 fprintf (stderr
, "%s\n\n", opts
);
2996 fputs ("<no options>\n\n", stderr
);
3001 /* Override various settings based on options. If MAIN_ARGS_P, the
3002 options are from the command line, otherwise they are from
3006 ix86_option_override_internal (bool main_args_p
)
3009 unsigned int ix86_arch_mask
, ix86_tune_mask
;
3010 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
3015 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3016 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3017 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3018 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3019 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3020 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3021 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3022 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3023 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3024 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3025 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3026 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3027 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3028 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3029 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3030 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3031 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3032 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3033 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3034 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3035 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3036 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3037 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3038 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3039 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3040 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3041 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3042 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3043 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3044 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3045 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3046 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3047 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3048 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3049 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3050 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3051 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3052 /* if this reaches 64, need to widen struct pta flags below */
3056 const char *const name
; /* processor name or nickname. */
3057 const enum processor_type processor
;
3058 const enum attr_cpu schedule
;
3059 const unsigned HOST_WIDE_INT flags
;
3061 const processor_alias_table
[] =
3063 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3064 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3065 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3066 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3067 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3068 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3069 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3070 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3071 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
3072 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3073 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3074 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
3075 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3077 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3079 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3080 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
3081 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3082 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
3083 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3084 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
3085 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3086 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
3087 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3088 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3089 | PTA_CX16
| PTA_NO_SAHF
},
3090 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
3091 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3092 | PTA_SSSE3
| PTA_CX16
},
3093 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
3094 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3095 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
3096 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
3097 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3098 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3099 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
3100 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
3101 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3102 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3103 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3104 | PTA_RDRND
| PTA_F16C
},
3105 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
3106 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3107 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3108 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3109 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3110 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
},
3111 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3112 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3113 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
3114 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3115 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3116 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3117 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3118 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3119 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3120 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3121 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3122 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3123 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3124 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3125 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3126 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3127 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3128 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3129 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3130 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3131 {"k8", PROCESSOR_K8
, CPU_K8
,
3132 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3133 | PTA_SSE2
| PTA_NO_SAHF
},
3134 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3135 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3136 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3137 {"opteron", PROCESSOR_K8
, CPU_K8
,
3138 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3139 | PTA_SSE2
| PTA_NO_SAHF
},
3140 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3141 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3142 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3143 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3144 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3145 | PTA_SSE2
| PTA_NO_SAHF
},
3146 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3147 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3148 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3149 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3150 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3151 | PTA_SSE2
| PTA_NO_SAHF
},
3152 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3153 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3154 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3155 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3156 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3157 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3158 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3159 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3160 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3161 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3162 | PTA_XOP
| PTA_LWP
},
3163 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3164 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3165 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3166 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3167 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3169 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3170 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3171 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3172 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3173 PTA_HLE
/* flags are only used for -march switch. */ },
3174 {"btver2", PROCESSOR_BTVER2
, CPU_GENERIC64
,
3175 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3176 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3177 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3178 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
},
3179 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3181 | PTA_HLE
/* flags are only used for -march switch. */ },
3184 /* -mrecip options. */
3187 const char *string
; /* option name */
3188 unsigned int mask
; /* mask bits to set */
3190 const recip_options
[] =
3192 { "all", RECIP_MASK_ALL
},
3193 { "none", RECIP_MASK_NONE
},
3194 { "div", RECIP_MASK_DIV
},
3195 { "sqrt", RECIP_MASK_SQRT
},
3196 { "vec-div", RECIP_MASK_VEC_DIV
},
3197 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3200 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3202 /* Set up prefix/suffix so the error messages refer to either the command
3203 line argument, or the attribute(target). */
3212 prefix
= "option(\"";
3217 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3218 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3219 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3220 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3221 #ifdef TARGET_BI_ARCH
3224 #if TARGET_BI_ARCH == 1
3225 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3226 is on and OPTION_MASK_ABI_X32 is off. We turn off
3227 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3230 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3232 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3233 on and OPTION_MASK_ABI_64 is off. We turn off
3234 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3237 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3244 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3245 OPTION_MASK_ABI_64 for TARGET_X32. */
3246 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3247 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3249 else if (TARGET_LP64
)
3251 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3252 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3253 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3254 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3257 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3258 SUBTARGET_OVERRIDE_OPTIONS
;
3261 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3262 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3265 /* -fPIC is the default for x86_64. */
3266 if (TARGET_MACHO
&& TARGET_64BIT
)
3269 /* Need to check -mtune=generic first. */
3270 if (ix86_tune_string
)
3272 if (!strcmp (ix86_tune_string
, "generic")
3273 || !strcmp (ix86_tune_string
, "i686")
3274 /* As special support for cross compilers we read -mtune=native
3275 as -mtune=generic. With native compilers we won't see the
3276 -mtune=native, as it was changed by the driver. */
3277 || !strcmp (ix86_tune_string
, "native"))
3280 ix86_tune_string
= "generic64";
3282 ix86_tune_string
= "generic32";
3284 /* If this call is for setting the option attribute, allow the
3285 generic32/generic64 that was previously set. */
3286 else if (!main_args_p
3287 && (!strcmp (ix86_tune_string
, "generic32")
3288 || !strcmp (ix86_tune_string
, "generic64")))
3290 else if (!strncmp (ix86_tune_string
, "generic", 7))
3291 error ("bad value (%s) for %stune=%s %s",
3292 ix86_tune_string
, prefix
, suffix
, sw
);
3293 else if (!strcmp (ix86_tune_string
, "x86-64"))
3294 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3295 "%stune=k8%s or %stune=generic%s instead as appropriate",
3296 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3300 if (ix86_arch_string
)
3301 ix86_tune_string
= ix86_arch_string
;
3302 if (!ix86_tune_string
)
3304 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3305 ix86_tune_defaulted
= 1;
3308 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3309 need to use a sensible tune option. */
3310 if (!strcmp (ix86_tune_string
, "generic")
3311 || !strcmp (ix86_tune_string
, "x86-64")
3312 || !strcmp (ix86_tune_string
, "i686"))
3315 ix86_tune_string
= "generic64";
3317 ix86_tune_string
= "generic32";
3321 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3323 /* rep; movq isn't available in 32-bit code. */
3324 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3325 ix86_stringop_alg
= no_stringop
;
3328 if (!ix86_arch_string
)
3329 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3331 ix86_arch_specified
= 1;
3333 if (global_options_set
.x_ix86_pmode
)
3335 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3336 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3337 error ("address mode %qs not supported in the %s bit mode",
3338 TARGET_64BIT
? "short" : "long",
3339 TARGET_64BIT
? "64" : "32");
3342 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3344 if (!global_options_set
.x_ix86_abi
)
3345 ix86_abi
= DEFAULT_ABI
;
3347 if (global_options_set
.x_ix86_cmodel
)
3349 switch (ix86_cmodel
)
3354 ix86_cmodel
= CM_SMALL_PIC
;
3356 error ("code model %qs not supported in the %s bit mode",
3363 ix86_cmodel
= CM_MEDIUM_PIC
;
3365 error ("code model %qs not supported in the %s bit mode",
3367 else if (TARGET_X32
)
3368 error ("code model %qs not supported in x32 mode",
3375 ix86_cmodel
= CM_LARGE_PIC
;
3377 error ("code model %qs not supported in the %s bit mode",
3379 else if (TARGET_X32
)
3380 error ("code model %qs not supported in x32 mode",
3386 error ("code model %s does not support PIC mode", "32");
3388 error ("code model %qs not supported in the %s bit mode",
3395 error ("code model %s does not support PIC mode", "kernel");
3396 ix86_cmodel
= CM_32
;
3399 error ("code model %qs not supported in the %s bit mode",
3409 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3410 use of rip-relative addressing. This eliminates fixups that
3411 would otherwise be needed if this object is to be placed in a
3412 DLL, and is essentially just as efficient as direct addressing. */
3413 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3414 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3415 else if (TARGET_64BIT
)
3416 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3418 ix86_cmodel
= CM_32
;
3420 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3422 error ("-masm=intel not supported in this configuration");
3423 ix86_asm_dialect
= ASM_ATT
;
3425 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3426 sorry ("%i-bit mode not compiled in",
3427 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3429 for (i
= 0; i
< pta_size
; i
++)
3430 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3432 ix86_schedule
= processor_alias_table
[i
].schedule
;
3433 ix86_arch
= processor_alias_table
[i
].processor
;
3434 /* Default cpu tuning to the architecture. */
3435 ix86_tune
= ix86_arch
;
3437 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3438 error ("CPU you selected does not support x86-64 "
3441 if (processor_alias_table
[i
].flags
& PTA_MMX
3442 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3443 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3444 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3445 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3446 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3447 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3448 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3449 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3450 if (processor_alias_table
[i
].flags
& PTA_SSE
3451 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3452 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3453 if (processor_alias_table
[i
].flags
& PTA_SSE2
3454 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3455 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3456 if (processor_alias_table
[i
].flags
& PTA_SSE3
3457 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3458 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3459 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3460 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3461 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3462 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3463 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3464 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3465 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3466 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3467 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3468 if (processor_alias_table
[i
].flags
& PTA_AVX
3469 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3470 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3471 if (processor_alias_table
[i
].flags
& PTA_AVX2
3472 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3473 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3474 if (processor_alias_table
[i
].flags
& PTA_FMA
3475 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3476 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3477 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3478 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3479 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3480 if (processor_alias_table
[i
].flags
& PTA_FMA4
3481 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3482 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3483 if (processor_alias_table
[i
].flags
& PTA_XOP
3484 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3485 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3486 if (processor_alias_table
[i
].flags
& PTA_LWP
3487 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3488 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3489 if (processor_alias_table
[i
].flags
& PTA_ABM
3490 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3491 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3492 if (processor_alias_table
[i
].flags
& PTA_BMI
3493 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3494 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3495 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3496 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3497 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3498 if (processor_alias_table
[i
].flags
& PTA_TBM
3499 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3500 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3501 if (processor_alias_table
[i
].flags
& PTA_BMI2
3502 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3503 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3504 if (processor_alias_table
[i
].flags
& PTA_CX16
3505 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3506 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3507 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3508 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3509 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3510 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3511 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3512 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3513 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3514 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3515 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3516 if (processor_alias_table
[i
].flags
& PTA_AES
3517 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3518 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3519 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3520 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3521 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3522 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3523 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3524 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3525 if (processor_alias_table
[i
].flags
& PTA_RDRND
3526 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3527 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3528 if (processor_alias_table
[i
].flags
& PTA_F16C
3529 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3530 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3531 if (processor_alias_table
[i
].flags
& PTA_RTM
3532 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3533 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3534 if (processor_alias_table
[i
].flags
& PTA_HLE
3535 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3536 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3537 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3538 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3539 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3540 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3541 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3542 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3543 if (processor_alias_table
[i
].flags
& PTA_ADX
3544 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3545 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3546 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3547 x86_prefetch_sse
= true;
3552 if (!strcmp (ix86_arch_string
, "generic"))
3553 error ("generic CPU can be used only for %stune=%s %s",
3554 prefix
, suffix
, sw
);
3555 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3556 error ("bad value (%s) for %sarch=%s %s",
3557 ix86_arch_string
, prefix
, suffix
, sw
);
3559 ix86_arch_mask
= 1u << ix86_arch
;
3560 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3561 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3563 for (i
= 0; i
< pta_size
; i
++)
3564 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3566 ix86_schedule
= processor_alias_table
[i
].schedule
;
3567 ix86_tune
= processor_alias_table
[i
].processor
;
3570 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3572 if (ix86_tune_defaulted
)
3574 ix86_tune_string
= "x86-64";
3575 for (i
= 0; i
< pta_size
; i
++)
3576 if (! strcmp (ix86_tune_string
,
3577 processor_alias_table
[i
].name
))
3579 ix86_schedule
= processor_alias_table
[i
].schedule
;
3580 ix86_tune
= processor_alias_table
[i
].processor
;
3583 error ("CPU you selected does not support x86-64 "
3589 /* Adjust tuning when compiling for 32-bit ABI. */
3592 case PROCESSOR_GENERIC64
:
3593 ix86_tune
= PROCESSOR_GENERIC32
;
3594 ix86_schedule
= CPU_PENTIUMPRO
;
3597 case PROCESSOR_CORE2_64
:
3598 ix86_tune
= PROCESSOR_CORE2_32
;
3601 case PROCESSOR_COREI7_64
:
3602 ix86_tune
= PROCESSOR_COREI7_32
;
3609 /* Intel CPUs have always interpreted SSE prefetch instructions as
3610 NOPs; so, we can enable SSE prefetch instructions even when
3611 -mtune (rather than -march) points us to a processor that has them.
3612 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3613 higher processors. */
3615 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3616 x86_prefetch_sse
= true;
3620 if (ix86_tune_specified
&& i
== pta_size
)
3621 error ("bad value (%s) for %stune=%s %s",
3622 ix86_tune_string
, prefix
, suffix
, sw
);
3624 ix86_tune_mask
= 1u << ix86_tune
;
3625 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3626 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3628 #ifndef USE_IX86_FRAME_POINTER
3629 #define USE_IX86_FRAME_POINTER 0
3632 #ifndef USE_X86_64_FRAME_POINTER
3633 #define USE_X86_64_FRAME_POINTER 0
3636 /* Set the default values for switches whose default depends on TARGET_64BIT
3637 in case they weren't overwritten by command line options. */
3640 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3641 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3642 if (flag_asynchronous_unwind_tables
== 2)
3643 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3644 if (flag_pcc_struct_return
== 2)
3645 flag_pcc_struct_return
= 0;
3649 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3650 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3651 if (flag_asynchronous_unwind_tables
== 2)
3652 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3653 if (flag_pcc_struct_return
== 2)
3654 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3657 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3659 ix86_cost
= &ix86_size_cost
;
3661 ix86_cost
= ix86_tune_cost
;
3663 /* Arrange to set up i386_stack_locals for all functions. */
3664 init_machine_status
= ix86_init_machine_status
;
3666 /* Validate -mregparm= value. */
3667 if (global_options_set
.x_ix86_regparm
)
3670 warning (0, "-mregparm is ignored in 64-bit mode");
3671 if (ix86_regparm
> REGPARM_MAX
)
3673 error ("-mregparm=%d is not between 0 and %d",
3674 ix86_regparm
, REGPARM_MAX
);
3679 ix86_regparm
= REGPARM_MAX
;
3681 /* Default align_* from the processor table. */
3682 if (align_loops
== 0)
3684 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3685 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3687 if (align_jumps
== 0)
3689 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3690 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3692 if (align_functions
== 0)
3694 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3697 /* Provide default for -mbranch-cost= value. */
3698 if (!global_options_set
.x_ix86_branch_cost
)
3699 ix86_branch_cost
= ix86_cost
->branch_cost
;
3703 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3705 /* Enable by default the SSE and MMX builtins. Do allow the user to
3706 explicitly disable any of these. In particular, disabling SSE and
3707 MMX for kernel code is extremely useful. */
3708 if (!ix86_arch_specified
)
3710 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3711 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3714 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3718 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3720 if (!ix86_arch_specified
)
3722 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3724 /* i386 ABI does not specify red zone. It still makes sense to use it
3725 when programmer takes care to stack from being destroyed. */
3726 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3727 target_flags
|= MASK_NO_RED_ZONE
;
3730 /* Keep nonleaf frame pointers. */
3731 if (flag_omit_frame_pointer
)
3732 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3733 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3734 flag_omit_frame_pointer
= 1;
3736 /* If we're doing fast math, we don't care about comparison order
3737 wrt NaNs. This lets us use a shorter comparison sequence. */
3738 if (flag_finite_math_only
)
3739 target_flags
&= ~MASK_IEEE_FP
;
3741 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3742 since the insns won't need emulation. */
3743 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3744 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3746 /* Likewise, if the target doesn't have a 387, or we've specified
3747 software floating point, don't use 387 inline intrinsics. */
3749 target_flags
|= MASK_NO_FANCY_MATH_387
;
3751 /* Turn on MMX builtins for -msse. */
3753 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3755 /* Enable SSE prefetch. */
3756 if (TARGET_SSE
|| TARGET_PRFCHW
)
3757 x86_prefetch_sse
= true;
3759 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3760 if (TARGET_SSE4_2
|| TARGET_ABM
)
3761 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3763 /* Turn on lzcnt instruction for -mabm. */
3765 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3767 /* Validate -mpreferred-stack-boundary= value or default it to
3768 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3769 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3770 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3772 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3773 int max
= (TARGET_SEH
? 4 : 12);
3775 if (ix86_preferred_stack_boundary_arg
< min
3776 || ix86_preferred_stack_boundary_arg
> max
)
3779 error ("-mpreferred-stack-boundary is not supported "
3782 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3783 ix86_preferred_stack_boundary_arg
, min
, max
);
3786 ix86_preferred_stack_boundary
3787 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3790 /* Set the default value for -mstackrealign. */
3791 if (ix86_force_align_arg_pointer
== -1)
3792 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3794 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3796 /* Validate -mincoming-stack-boundary= value or default it to
3797 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3798 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3799 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3801 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3802 || ix86_incoming_stack_boundary_arg
> 12)
3803 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3804 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3807 ix86_user_incoming_stack_boundary
3808 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3809 ix86_incoming_stack_boundary
3810 = ix86_user_incoming_stack_boundary
;
3814 /* Accept -msseregparm only if at least SSE support is enabled. */
3815 if (TARGET_SSEREGPARM
3817 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3819 if (global_options_set
.x_ix86_fpmath
)
3821 if (ix86_fpmath
& FPMATH_SSE
)
3825 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3826 ix86_fpmath
= FPMATH_387
;
3828 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3830 warning (0, "387 instruction set disabled, using SSE arithmetics");
3831 ix86_fpmath
= FPMATH_SSE
;
3836 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3838 /* If the i387 is disabled, then do not return values in it. */
3840 target_flags
&= ~MASK_FLOAT_RETURNS
;
3842 /* Use external vectorized library in vectorizing intrinsics. */
3843 if (global_options_set
.x_ix86_veclibabi_type
)
3844 switch (ix86_veclibabi_type
)
3846 case ix86_veclibabi_type_svml
:
3847 ix86_veclib_handler
= ix86_veclibabi_svml
;
3850 case ix86_veclibabi_type_acml
:
3851 ix86_veclib_handler
= ix86_veclibabi_acml
;
3858 if ((!USE_IX86_FRAME_POINTER
3859 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3860 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3862 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3864 /* ??? Unwind info is not correct around the CFG unless either a frame
3865 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3866 unwind info generation to be aware of the CFG and propagating states
3868 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3869 || flag_exceptions
|| flag_non_call_exceptions
)
3870 && flag_omit_frame_pointer
3871 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3873 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3874 warning (0, "unwind tables currently require either a frame pointer "
3875 "or %saccumulate-outgoing-args%s for correctness",
3877 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3880 /* If stack probes are required, the space used for large function
3881 arguments on the stack must also be probed, so enable
3882 -maccumulate-outgoing-args so this happens in the prologue. */
3883 if (TARGET_STACK_PROBE
3884 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3886 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3887 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3888 "for correctness", prefix
, suffix
);
3889 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3892 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3895 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3896 p
= strchr (internal_label_prefix
, 'X');
3897 internal_label_prefix_len
= p
- internal_label_prefix
;
3901 /* When scheduling description is not available, disable scheduler pass
3902 so it won't slow down the compilation and make x87 code slower. */
3903 if (!TARGET_SCHEDULE
)
3904 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3906 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3907 ix86_tune_cost
->simultaneous_prefetches
,
3908 global_options
.x_param_values
,
3909 global_options_set
.x_param_values
);
3910 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3911 ix86_tune_cost
->prefetch_block
,
3912 global_options
.x_param_values
,
3913 global_options_set
.x_param_values
);
3914 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3915 ix86_tune_cost
->l1_cache_size
,
3916 global_options
.x_param_values
,
3917 global_options_set
.x_param_values
);
3918 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3919 ix86_tune_cost
->l2_cache_size
,
3920 global_options
.x_param_values
,
3921 global_options_set
.x_param_values
);
3923 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3924 if (flag_prefetch_loop_arrays
< 0
3927 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3928 flag_prefetch_loop_arrays
= 1;
3930 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3931 can be optimized to ap = __builtin_next_arg (0). */
3932 if (!TARGET_64BIT
&& !flag_split_stack
)
3933 targetm
.expand_builtin_va_start
= NULL
;
3937 ix86_gen_leave
= gen_leave_rex64
;
3938 if (Pmode
== DImode
)
3940 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3941 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3942 ix86_gen_tls_local_dynamic_base_64
3943 = gen_tls_local_dynamic_base_64_di
;
3947 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3948 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3949 ix86_gen_tls_local_dynamic_base_64
3950 = gen_tls_local_dynamic_base_64_si
;
3955 ix86_gen_leave
= gen_leave
;
3956 ix86_gen_monitor
= gen_sse3_monitor
;
3959 if (Pmode
== DImode
)
3961 ix86_gen_add3
= gen_adddi3
;
3962 ix86_gen_sub3
= gen_subdi3
;
3963 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3964 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3965 ix86_gen_andsp
= gen_anddi3
;
3966 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3967 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3968 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3972 ix86_gen_add3
= gen_addsi3
;
3973 ix86_gen_sub3
= gen_subsi3
;
3974 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3975 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3976 ix86_gen_andsp
= gen_andsi3
;
3977 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3978 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3979 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3983 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3985 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3988 if (!TARGET_64BIT
&& flag_pic
)
3990 if (flag_fentry
> 0)
3991 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3995 else if (TARGET_SEH
)
3997 if (flag_fentry
== 0)
3998 sorry ("-mno-fentry isn%'t compatible with SEH");
4001 else if (flag_fentry
< 0)
4003 #if defined(PROFILE_BEFORE_PROLOGUE)
4012 /* When not optimize for size, enable vzeroupper optimization for
4013 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4014 AVX unaligned load/store. */
4017 if (flag_expensive_optimizations
4018 && !(target_flags_explicit
& MASK_VZEROUPPER
))
4019 target_flags
|= MASK_VZEROUPPER
;
4020 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
4021 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4022 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4023 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
4024 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4025 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4026 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
4027 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
4028 target_flags
|= MASK_PREFER_AVX128
;
4033 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4034 target_flags
&= ~MASK_VZEROUPPER
;
4037 if (ix86_recip_name
)
4039 char *p
= ASTRDUP (ix86_recip_name
);
4041 unsigned int mask
, i
;
4044 while ((q
= strtok (p
, ",")) != NULL
)
4055 if (!strcmp (q
, "default"))
4056 mask
= RECIP_MASK_ALL
;
4059 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4060 if (!strcmp (q
, recip_options
[i
].string
))
4062 mask
= recip_options
[i
].mask
;
4066 if (i
== ARRAY_SIZE (recip_options
))
4068 error ("unknown option for -mrecip=%s", q
);
4070 mask
= RECIP_MASK_NONE
;
4074 recip_mask_explicit
|= mask
;
4076 recip_mask
&= ~mask
;
4083 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
4084 else if (target_flags_explicit
& MASK_RECIP
)
4085 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
4087 /* Save the initial options in case the user does function specific
4090 target_option_default_node
= target_option_current_node
4091 = build_target_option_node ();
4094 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4097 function_pass_avx256_p (const_rtx val
)
4102 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
4105 if (GET_CODE (val
) == PARALLEL
)
4110 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
4112 r
= XVECEXP (val
, 0, i
);
4113 if (GET_CODE (r
) == EXPR_LIST
4115 && REG_P (XEXP (r
, 0))
4116 && (GET_MODE (XEXP (r
, 0)) == OImode
4117 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
4125 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4128 ix86_option_override (void)
4130 ix86_option_override_internal (true);
4133 /* Update register usage after having seen the compiler flags. */
4136 ix86_conditional_register_usage (void)
4141 /* The PIC register, if it exists, is fixed. */
4142 j
= PIC_OFFSET_TABLE_REGNUM
;
4143 if (j
!= INVALID_REGNUM
)
4144 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4146 /* For 32-bit targets, squash the REX registers. */
4149 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4150 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4151 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4152 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4155 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4156 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4157 : TARGET_64BIT
? (1 << 2)
4160 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4162 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4164 /* Set/reset conditionally defined registers from
4165 CALL_USED_REGISTERS initializer. */
4166 if (call_used_regs
[i
] > 1)
4167 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4169 /* Calculate registers of CLOBBERED_REGS register set
4170 as call used registers from GENERAL_REGS register set. */
4171 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4172 && call_used_regs
[i
])
4173 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4176 /* If MMX is disabled, squash the registers. */
4178 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4179 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4180 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4182 /* If SSE is disabled, squash the registers. */
4184 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4185 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4186 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4188 /* If the FPU is disabled, squash the registers. */
4189 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4190 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4191 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4192 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4196 /* Save the current options */
4199 ix86_function_specific_save (struct cl_target_option
*ptr
)
4201 ptr
->arch
= ix86_arch
;
4202 ptr
->schedule
= ix86_schedule
;
4203 ptr
->tune
= ix86_tune
;
4204 ptr
->branch_cost
= ix86_branch_cost
;
4205 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4206 ptr
->arch_specified
= ix86_arch_specified
;
4207 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4208 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4209 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4211 /* The fields are char but the variables are not; make sure the
4212 values fit in the fields. */
4213 gcc_assert (ptr
->arch
== ix86_arch
);
4214 gcc_assert (ptr
->schedule
== ix86_schedule
);
4215 gcc_assert (ptr
->tune
== ix86_tune
);
4216 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4219 /* Restore the current options */
4222 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4224 enum processor_type old_tune
= ix86_tune
;
4225 enum processor_type old_arch
= ix86_arch
;
4226 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4229 ix86_arch
= (enum processor_type
) ptr
->arch
;
4230 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4231 ix86_tune
= (enum processor_type
) ptr
->tune
;
4232 ix86_branch_cost
= ptr
->branch_cost
;
4233 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4234 ix86_arch_specified
= ptr
->arch_specified
;
4235 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4236 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4237 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4239 /* Recreate the arch feature tests if the arch changed */
4240 if (old_arch
!= ix86_arch
)
4242 ix86_arch_mask
= 1u << ix86_arch
;
4243 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4244 ix86_arch_features
[i
]
4245 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4248 /* Recreate the tune optimization tests */
4249 if (old_tune
!= ix86_tune
)
4251 ix86_tune_mask
= 1u << ix86_tune
;
4252 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4253 ix86_tune_features
[i
]
4254 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4258 /* Print the current options */
4261 ix86_function_specific_print (FILE *file
, int indent
,
4262 struct cl_target_option
*ptr
)
4265 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4266 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4268 fprintf (file
, "%*sarch = %d (%s)\n",
4271 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4272 ? cpu_names
[ptr
->arch
]
4275 fprintf (file
, "%*stune = %d (%s)\n",
4278 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4279 ? cpu_names
[ptr
->tune
]
4282 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4286 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4287 free (target_string
);
4292 /* Inner function to process the attribute((target(...))), take an argument and
4293 set the current options from the argument. If we have a list, recursively go
4297 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4298 struct gcc_options
*enum_opts_set
)
4303 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4304 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4305 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4306 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4307 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4323 enum ix86_opt_type type
;
4328 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4329 IX86_ATTR_ISA ("abm", OPT_mabm
),
4330 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4331 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4332 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4333 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4334 IX86_ATTR_ISA ("aes", OPT_maes
),
4335 IX86_ATTR_ISA ("avx", OPT_mavx
),
4336 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4337 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4338 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4339 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4340 IX86_ATTR_ISA ("sse", OPT_msse
),
4341 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4342 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4343 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4344 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4345 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4346 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4347 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4348 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4349 IX86_ATTR_ISA ("fma", OPT_mfma
),
4350 IX86_ATTR_ISA ("xop", OPT_mxop
),
4351 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4352 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4353 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4354 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4355 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4356 IX86_ATTR_ISA ("hle", OPT_mhle
),
4357 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4358 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4359 IX86_ATTR_ISA ("adx", OPT_madx
),
4362 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4364 /* string options */
4365 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4366 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4369 IX86_ATTR_YES ("cld",
4373 IX86_ATTR_NO ("fancy-math-387",
4374 OPT_mfancy_math_387
,
4375 MASK_NO_FANCY_MATH_387
),
4377 IX86_ATTR_YES ("ieee-fp",
4381 IX86_ATTR_YES ("inline-all-stringops",
4382 OPT_minline_all_stringops
,
4383 MASK_INLINE_ALL_STRINGOPS
),
4385 IX86_ATTR_YES ("inline-stringops-dynamically",
4386 OPT_minline_stringops_dynamically
,
4387 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4389 IX86_ATTR_NO ("align-stringops",
4390 OPT_mno_align_stringops
,
4391 MASK_NO_ALIGN_STRINGOPS
),
4393 IX86_ATTR_YES ("recip",
4399 /* If this is a list, recurse to get the options. */
4400 if (TREE_CODE (args
) == TREE_LIST
)
4404 for (; args
; args
= TREE_CHAIN (args
))
4405 if (TREE_VALUE (args
)
4406 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4407 p_strings
, enum_opts_set
))
4413 else if (TREE_CODE (args
) != STRING_CST
)
4416 /* Handle multiple arguments separated by commas. */
4417 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4419 while (next_optstr
&& *next_optstr
!= '\0')
4421 char *p
= next_optstr
;
4423 char *comma
= strchr (next_optstr
, ',');
4424 const char *opt_string
;
4425 size_t len
, opt_len
;
4430 enum ix86_opt_type type
= ix86_opt_unknown
;
4436 len
= comma
- next_optstr
;
4437 next_optstr
= comma
+ 1;
4445 /* Recognize no-xxx. */
4446 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4455 /* Find the option. */
4458 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4460 type
= attrs
[i
].type
;
4461 opt_len
= attrs
[i
].len
;
4462 if (ch
== attrs
[i
].string
[0]
4463 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4466 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4469 mask
= attrs
[i
].mask
;
4470 opt_string
= attrs
[i
].string
;
4475 /* Process the option. */
4478 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4482 else if (type
== ix86_opt_isa
)
4484 struct cl_decoded_option decoded
;
4486 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4487 ix86_handle_option (&global_options
, &global_options_set
,
4488 &decoded
, input_location
);
4491 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4493 if (type
== ix86_opt_no
)
4494 opt_set_p
= !opt_set_p
;
4497 target_flags
|= mask
;
4499 target_flags
&= ~mask
;
4502 else if (type
== ix86_opt_str
)
4506 error ("option(\"%s\") was already specified", opt_string
);
4510 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4513 else if (type
== ix86_opt_enum
)
4518 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4520 set_option (&global_options
, enum_opts_set
, opt
, value
,
4521 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4525 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4537 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4540 ix86_valid_target_attribute_tree (tree args
)
4542 const char *orig_arch_string
= ix86_arch_string
;
4543 const char *orig_tune_string
= ix86_tune_string
;
4544 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4545 int orig_tune_defaulted
= ix86_tune_defaulted
;
4546 int orig_arch_specified
= ix86_arch_specified
;
4547 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4550 struct cl_target_option
*def
4551 = TREE_TARGET_OPTION (target_option_default_node
);
4552 struct gcc_options enum_opts_set
;
4554 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4556 /* Process each of the options on the chain. */
4557 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4561 /* If the changed options are different from the default, rerun
4562 ix86_option_override_internal, and then save the options away.
4563 The string options are are attribute options, and will be undone
4564 when we copy the save structure. */
4565 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4566 || target_flags
!= def
->x_target_flags
4567 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4568 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4569 || enum_opts_set
.x_ix86_fpmath
)
4571 /* If we are using the default tune= or arch=, undo the string assigned,
4572 and use the default. */
4573 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4574 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4575 else if (!orig_arch_specified
)
4576 ix86_arch_string
= NULL
;
4578 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4579 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4580 else if (orig_tune_defaulted
)
4581 ix86_tune_string
= NULL
;
4583 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4584 if (enum_opts_set
.x_ix86_fpmath
)
4585 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4586 else if (!TARGET_64BIT
&& TARGET_SSE
)
4588 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4589 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4592 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4593 ix86_option_override_internal (false);
4595 /* Add any builtin functions with the new isa if any. */
4596 ix86_add_new_builtins (ix86_isa_flags
);
4598 /* Save the current options unless we are validating options for
4600 t
= build_target_option_node ();
4602 ix86_arch_string
= orig_arch_string
;
4603 ix86_tune_string
= orig_tune_string
;
4604 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4606 /* Free up memory allocated to hold the strings */
4607 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4608 free (option_strings
[i
]);
4614 /* Hook to validate attribute((target("string"))). */
4617 ix86_valid_target_attribute_p (tree fndecl
,
4618 tree
ARG_UNUSED (name
),
4620 int ARG_UNUSED (flags
))
4622 struct cl_target_option cur_target
;
4624 tree old_optimize
= build_optimization_node ();
4625 tree new_target
, new_optimize
;
4626 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4628 /* If the function changed the optimization levels as well as setting target
4629 options, start with the optimizations specified. */
4630 if (func_optimize
&& func_optimize
!= old_optimize
)
4631 cl_optimization_restore (&global_options
,
4632 TREE_OPTIMIZATION (func_optimize
));
4634 /* The target attributes may also change some optimization flags, so update
4635 the optimization options if necessary. */
4636 cl_target_option_save (&cur_target
, &global_options
);
4637 new_target
= ix86_valid_target_attribute_tree (args
);
4638 new_optimize
= build_optimization_node ();
4645 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4647 if (old_optimize
!= new_optimize
)
4648 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4651 cl_target_option_restore (&global_options
, &cur_target
);
4653 if (old_optimize
!= new_optimize
)
4654 cl_optimization_restore (&global_options
,
4655 TREE_OPTIMIZATION (old_optimize
));
4661 /* Hook to determine if one function can safely inline another. */
4664 ix86_can_inline_p (tree caller
, tree callee
)
4667 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4668 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4670 /* If callee has no option attributes, then it is ok to inline. */
4674 /* If caller has no option attributes, but callee does then it is not ok to
4676 else if (!caller_tree
)
4681 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4682 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4684 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4685 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4687 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4688 != callee_opts
->x_ix86_isa_flags
)
4691 /* See if we have the same non-isa options. */
4692 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4695 /* See if arch, tune, etc. are the same. */
4696 else if (caller_opts
->arch
!= callee_opts
->arch
)
4699 else if (caller_opts
->tune
!= callee_opts
->tune
)
4702 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4705 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4716 /* Remember the last target of ix86_set_current_function. */
4717 static GTY(()) tree ix86_previous_fndecl
;
4719 /* Establish appropriate back-end context for processing the function
4720 FNDECL. The argument might be NULL to indicate processing at top
4721 level, outside of any function scope. */
4723 ix86_set_current_function (tree fndecl
)
4725 /* Only change the context if the function changes. This hook is called
4726 several times in the course of compiling a function, and we don't want to
4727 slow things down too much or call target_reinit when it isn't safe. */
4728 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4730 tree old_tree
= (ix86_previous_fndecl
4731 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4734 tree new_tree
= (fndecl
4735 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4738 ix86_previous_fndecl
= fndecl
;
4739 if (old_tree
== new_tree
)
4744 cl_target_option_restore (&global_options
,
4745 TREE_TARGET_OPTION (new_tree
));
4751 struct cl_target_option
*def
4752 = TREE_TARGET_OPTION (target_option_current_node
);
4754 cl_target_option_restore (&global_options
, def
);
4761 /* Return true if this goes in large data/bss. */
4764 ix86_in_large_data_p (tree exp
)
4766 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4769 /* Functions are never large data. */
4770 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4773 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4775 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4776 if (strcmp (section
, ".ldata") == 0
4777 || strcmp (section
, ".lbss") == 0)
4783 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4785 /* If this is an incomplete type with size 0, then we can't put it
4786 in data because it might be too big when completed. */
4787 if (!size
|| size
> ix86_section_threshold
)
4794 /* Switch to the appropriate section for output of DECL.
4795 DECL is either a `VAR_DECL' node or a constant of some sort.
4796 RELOC indicates whether forming the initial value of DECL requires
4797 link-time relocations. */
4799 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4803 x86_64_elf_select_section (tree decl
, int reloc
,
4804 unsigned HOST_WIDE_INT align
)
4806 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4807 && ix86_in_large_data_p (decl
))
4809 const char *sname
= NULL
;
4810 unsigned int flags
= SECTION_WRITE
;
4811 switch (categorize_decl_for_section (decl
, reloc
))
4816 case SECCAT_DATA_REL
:
4817 sname
= ".ldata.rel";
4819 case SECCAT_DATA_REL_LOCAL
:
4820 sname
= ".ldata.rel.local";
4822 case SECCAT_DATA_REL_RO
:
4823 sname
= ".ldata.rel.ro";
4825 case SECCAT_DATA_REL_RO_LOCAL
:
4826 sname
= ".ldata.rel.ro.local";
4830 flags
|= SECTION_BSS
;
4833 case SECCAT_RODATA_MERGE_STR
:
4834 case SECCAT_RODATA_MERGE_STR_INIT
:
4835 case SECCAT_RODATA_MERGE_CONST
:
4839 case SECCAT_SRODATA
:
4846 /* We don't split these for medium model. Place them into
4847 default sections and hope for best. */
4852 /* We might get called with string constants, but get_named_section
4853 doesn't like them as they are not DECLs. Also, we need to set
4854 flags in that case. */
4856 return get_section (sname
, flags
, NULL
);
4857 return get_named_section (decl
, sname
, reloc
);
4860 return default_elf_select_section (decl
, reloc
, align
);
4863 /* Build up a unique section name, expressed as a
4864 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4865 RELOC indicates whether the initial value of EXP requires
4866 link-time relocations. */
4868 static void ATTRIBUTE_UNUSED
4869 x86_64_elf_unique_section (tree decl
, int reloc
)
4871 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4872 && ix86_in_large_data_p (decl
))
4874 const char *prefix
= NULL
;
4875 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4876 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4878 switch (categorize_decl_for_section (decl
, reloc
))
4881 case SECCAT_DATA_REL
:
4882 case SECCAT_DATA_REL_LOCAL
:
4883 case SECCAT_DATA_REL_RO
:
4884 case SECCAT_DATA_REL_RO_LOCAL
:
4885 prefix
= one_only
? ".ld" : ".ldata";
4888 prefix
= one_only
? ".lb" : ".lbss";
4891 case SECCAT_RODATA_MERGE_STR
:
4892 case SECCAT_RODATA_MERGE_STR_INIT
:
4893 case SECCAT_RODATA_MERGE_CONST
:
4894 prefix
= one_only
? ".lr" : ".lrodata";
4896 case SECCAT_SRODATA
:
4903 /* We don't split these for medium model. Place them into
4904 default sections and hope for best. */
4909 const char *name
, *linkonce
;
4912 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4913 name
= targetm
.strip_name_encoding (name
);
4915 /* If we're using one_only, then there needs to be a .gnu.linkonce
4916 prefix to the section name. */
4917 linkonce
= one_only
? ".gnu.linkonce" : "";
4919 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4921 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4925 default_unique_section (decl
, reloc
);
4928 #ifdef COMMON_ASM_OP
4929 /* This says how to output assembler code to declare an
4930 uninitialized external linkage data object.
4932 For medium model x86-64 we need to use .largecomm opcode for
4935 x86_elf_aligned_common (FILE *file
,
4936 const char *name
, unsigned HOST_WIDE_INT size
,
4939 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4940 && size
> (unsigned int)ix86_section_threshold
)
4941 fputs (".largecomm\t", file
);
4943 fputs (COMMON_ASM_OP
, file
);
4944 assemble_name (file
, name
);
4945 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4946 size
, align
/ BITS_PER_UNIT
);
4950 /* Utility function for targets to use in implementing
4951 ASM_OUTPUT_ALIGNED_BSS. */
4954 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4955 const char *name
, unsigned HOST_WIDE_INT size
,
4958 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4959 && size
> (unsigned int)ix86_section_threshold
)
4960 switch_to_section (get_named_section (decl
, ".lbss", 0));
4962 switch_to_section (bss_section
);
4963 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4964 #ifdef ASM_DECLARE_OBJECT_NAME
4965 last_assemble_variable_decl
= decl
;
4966 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4968 /* Standard thing is just output label for the object. */
4969 ASM_OUTPUT_LABEL (file
, name
);
4970 #endif /* ASM_DECLARE_OBJECT_NAME */
4971 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4974 /* Decide whether we must probe the stack before any space allocation
4975 on this target. It's essentially TARGET_STACK_PROBE except when
4976 -fstack-check causes the stack to be already probed differently. */
4979 ix86_target_stack_probe (void)
4981 /* Do not probe the stack twice if static stack checking is enabled. */
4982 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4985 return TARGET_STACK_PROBE
;
4988 /* Decide whether we can make a sibling call to a function. DECL is the
4989 declaration of the function being targeted by the call and EXP is the
4990 CALL_EXPR representing the call. */
4993 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4995 tree type
, decl_or_type
;
4998 /* If we are generating position-independent code, we cannot sibcall
4999 optimize any indirect call, or a direct call to a global function,
5000 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5004 && (!decl
|| !targetm
.binds_local_p (decl
)))
5007 /* If we need to align the outgoing stack, then sibcalling would
5008 unalign the stack, which may break the called function. */
5009 if (ix86_minimum_incoming_stack_boundary (true)
5010 < PREFERRED_STACK_BOUNDARY
)
5015 decl_or_type
= decl
;
5016 type
= TREE_TYPE (decl
);
5020 /* We're looking at the CALL_EXPR, we need the type of the function. */
5021 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5022 type
= TREE_TYPE (type
); /* pointer type */
5023 type
= TREE_TYPE (type
); /* function type */
5024 decl_or_type
= type
;
5027 /* Check that the return value locations are the same. Like
5028 if we are returning floats on the 80387 register stack, we cannot
5029 make a sibcall from a function that doesn't return a float to a
5030 function that does or, conversely, from a function that does return
5031 a float to a function that doesn't; the necessary stack adjustment
5032 would not be executed. This is also the place we notice
5033 differences in the return value ABI. Note that it is ok for one
5034 of the functions to have void return type as long as the return
5035 value of the other is passed in a register. */
5036 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5037 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5039 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5041 if (!rtx_equal_p (a
, b
))
5044 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5046 /* Disable sibcall if we need to generate vzeroupper after
5048 if (TARGET_VZEROUPPER
5049 && cfun
->machine
->callee_return_avx256_p
5050 && !cfun
->machine
->caller_return_avx256_p
)
5053 else if (!rtx_equal_p (a
, b
))
5058 /* The SYSV ABI has more call-clobbered registers;
5059 disallow sibcalls from MS to SYSV. */
5060 if (cfun
->machine
->call_abi
== MS_ABI
5061 && ix86_function_type_abi (type
) == SYSV_ABI
)
5066 /* If this call is indirect, we'll need to be able to use a
5067 call-clobbered register for the address of the target function.
5068 Make sure that all such registers are not used for passing
5069 parameters. Note that DLLIMPORT functions are indirect. */
5071 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5073 if (ix86_function_regparm (type
, NULL
) >= 3)
5075 /* ??? Need to count the actual number of registers to be used,
5076 not the possible number of registers. Fix later. */
5082 /* Otherwise okay. That also includes certain types of indirect calls. */
5086 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5087 and "sseregparm" calling convention attributes;
5088 arguments as in struct attribute_spec.handler. */
5091 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5093 int flags ATTRIBUTE_UNUSED
,
5096 if (TREE_CODE (*node
) != FUNCTION_TYPE
5097 && TREE_CODE (*node
) != METHOD_TYPE
5098 && TREE_CODE (*node
) != FIELD_DECL
5099 && TREE_CODE (*node
) != TYPE_DECL
)
5101 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5103 *no_add_attrs
= true;
5107 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5108 if (is_attribute_p ("regparm", name
))
5112 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5114 error ("fastcall and regparm attributes are not compatible");
5117 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5119 error ("regparam and thiscall attributes are not compatible");
5122 cst
= TREE_VALUE (args
);
5123 if (TREE_CODE (cst
) != INTEGER_CST
)
5125 warning (OPT_Wattributes
,
5126 "%qE attribute requires an integer constant argument",
5128 *no_add_attrs
= true;
5130 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5132 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5134 *no_add_attrs
= true;
5142 /* Do not warn when emulating the MS ABI. */
5143 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5144 && TREE_CODE (*node
) != METHOD_TYPE
)
5145 || ix86_function_type_abi (*node
) != MS_ABI
)
5146 warning (OPT_Wattributes
, "%qE attribute ignored",
5148 *no_add_attrs
= true;
5152 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5153 if (is_attribute_p ("fastcall", name
))
5155 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5157 error ("fastcall and cdecl attributes are not compatible");
5159 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5161 error ("fastcall and stdcall attributes are not compatible");
5163 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5165 error ("fastcall and regparm attributes are not compatible");
5167 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5169 error ("fastcall and thiscall attributes are not compatible");
5173 /* Can combine stdcall with fastcall (redundant), regparm and
5175 else if (is_attribute_p ("stdcall", name
))
5177 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5179 error ("stdcall and cdecl attributes are not compatible");
5181 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5183 error ("stdcall and fastcall attributes are not compatible");
5185 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5187 error ("stdcall and thiscall attributes are not compatible");
5191 /* Can combine cdecl with regparm and sseregparm. */
5192 else if (is_attribute_p ("cdecl", name
))
5194 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5196 error ("stdcall and cdecl attributes are not compatible");
5198 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5200 error ("fastcall and cdecl attributes are not compatible");
5202 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5204 error ("cdecl and thiscall attributes are not compatible");
5207 else if (is_attribute_p ("thiscall", name
))
5209 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5210 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5212 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5214 error ("stdcall and thiscall attributes are not compatible");
5216 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5218 error ("fastcall and thiscall attributes are not compatible");
5220 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5222 error ("cdecl and thiscall attributes are not compatible");
5226 /* Can combine sseregparm with all attributes. */
5231 /* The transactional memory builtins are implicitly regparm or fastcall
5232 depending on the ABI. Override the generic do-nothing attribute that
5233 these builtins were declared with, and replace it with one of the two
5234 attributes that we expect elsewhere. */
5237 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5238 tree args ATTRIBUTE_UNUSED
,
5239 int flags ATTRIBUTE_UNUSED
,
5244 /* In no case do we want to add the placeholder attribute. */
5245 *no_add_attrs
= true;
5247 /* The 64-bit ABI is unchanged for transactional memory. */
5251 /* ??? Is there a better way to validate 32-bit windows? We have
5252 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5253 if (CHECK_STACK_LIMIT
> 0)
5254 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5257 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5258 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5260 decl_attributes (node
, alt
, flags
);
5265 /* This function determines from TYPE the calling-convention. */
5268 ix86_get_callcvt (const_tree type
)
5270 unsigned int ret
= 0;
5275 return IX86_CALLCVT_CDECL
;
5277 attrs
= TYPE_ATTRIBUTES (type
);
5278 if (attrs
!= NULL_TREE
)
5280 if (lookup_attribute ("cdecl", attrs
))
5281 ret
|= IX86_CALLCVT_CDECL
;
5282 else if (lookup_attribute ("stdcall", attrs
))
5283 ret
|= IX86_CALLCVT_STDCALL
;
5284 else if (lookup_attribute ("fastcall", attrs
))
5285 ret
|= IX86_CALLCVT_FASTCALL
;
5286 else if (lookup_attribute ("thiscall", attrs
))
5287 ret
|= IX86_CALLCVT_THISCALL
;
5289 /* Regparam isn't allowed for thiscall and fastcall. */
5290 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5292 if (lookup_attribute ("regparm", attrs
))
5293 ret
|= IX86_CALLCVT_REGPARM
;
5294 if (lookup_attribute ("sseregparm", attrs
))
5295 ret
|= IX86_CALLCVT_SSEREGPARM
;
5298 if (IX86_BASE_CALLCVT(ret
) != 0)
5302 is_stdarg
= stdarg_p (type
);
5303 if (TARGET_RTD
&& !is_stdarg
)
5304 return IX86_CALLCVT_STDCALL
| ret
;
5308 || TREE_CODE (type
) != METHOD_TYPE
5309 || ix86_function_type_abi (type
) != MS_ABI
)
5310 return IX86_CALLCVT_CDECL
| ret
;
5312 return IX86_CALLCVT_THISCALL
;
5315 /* Return 0 if the attributes for two types are incompatible, 1 if they
5316 are compatible, and 2 if they are nearly compatible (which causes a
5317 warning to be generated). */
5320 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5322 unsigned int ccvt1
, ccvt2
;
5324 if (TREE_CODE (type1
) != FUNCTION_TYPE
5325 && TREE_CODE (type1
) != METHOD_TYPE
)
5328 ccvt1
= ix86_get_callcvt (type1
);
5329 ccvt2
= ix86_get_callcvt (type2
);
5332 if (ix86_function_regparm (type1
, NULL
)
5333 != ix86_function_regparm (type2
, NULL
))
5339 /* Return the regparm value for a function with the indicated TYPE and DECL.
5340 DECL may be NULL when calling function indirectly
5341 or considering a libcall. */
5344 ix86_function_regparm (const_tree type
, const_tree decl
)
5351 return (ix86_function_type_abi (type
) == SYSV_ABI
5352 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5353 ccvt
= ix86_get_callcvt (type
);
5354 regparm
= ix86_regparm
;
5356 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5358 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5361 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5365 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5367 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5370 /* Use register calling convention for local functions when possible. */
5372 && TREE_CODE (decl
) == FUNCTION_DECL
5374 && !(profile_flag
&& !flag_fentry
))
5376 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5377 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5378 if (i
&& i
->local
&& i
->can_change_signature
)
5380 int local_regparm
, globals
= 0, regno
;
5382 /* Make sure no regparm register is taken by a
5383 fixed register variable. */
5384 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5385 if (fixed_regs
[local_regparm
])
5388 /* We don't want to use regparm(3) for nested functions as
5389 these use a static chain pointer in the third argument. */
5390 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5393 /* In 32-bit mode save a register for the split stack. */
5394 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5397 /* Each fixed register usage increases register pressure,
5398 so less registers should be used for argument passing.
5399 This functionality can be overriden by an explicit
5401 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5402 if (fixed_regs
[regno
])
5406 = globals
< local_regparm
? local_regparm
- globals
: 0;
5408 if (local_regparm
> regparm
)
5409 regparm
= local_regparm
;
5416 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5417 DFmode (2) arguments in SSE registers for a function with the
5418 indicated TYPE and DECL. DECL may be NULL when calling function
5419 indirectly or considering a libcall. Otherwise return 0. */
5422 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5424 gcc_assert (!TARGET_64BIT
);
5426 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5427 by the sseregparm attribute. */
5428 if (TARGET_SSEREGPARM
5429 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5436 error ("calling %qD with attribute sseregparm without "
5437 "SSE/SSE2 enabled", decl
);
5439 error ("calling %qT with attribute sseregparm without "
5440 "SSE/SSE2 enabled", type
);
5448 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5449 (and DFmode for SSE2) arguments in SSE registers. */
5450 if (decl
&& TARGET_SSE_MATH
&& optimize
5451 && !(profile_flag
&& !flag_fentry
))
5453 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5454 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5455 if (i
&& i
->local
&& i
->can_change_signature
)
5456 return TARGET_SSE2
? 2 : 1;
5462 /* Return true if EAX is live at the start of the function. Used by
5463 ix86_expand_prologue to determine if we need special help before
5464 calling allocate_stack_worker. */
5467 ix86_eax_live_at_start_p (void)
5469 /* Cheat. Don't bother working forward from ix86_function_regparm
5470 to the function type to whether an actual argument is located in
5471 eax. Instead just look at cfg info, which is still close enough
5472 to correct at this point. This gives false positives for broken
5473 functions that might use uninitialized data that happens to be
5474 allocated in eax, but who cares? */
5475 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5479 ix86_keep_aggregate_return_pointer (tree fntype
)
5485 attr
= lookup_attribute ("callee_pop_aggregate_return",
5486 TYPE_ATTRIBUTES (fntype
));
5488 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5490 /* For 32-bit MS-ABI the default is to keep aggregate
5492 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5495 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5498 /* Value is the number of bytes of arguments automatically
5499 popped when returning from a subroutine call.
5500 FUNDECL is the declaration node of the function (as a tree),
5501 FUNTYPE is the data type of the function (as a tree),
5502 or for a library call it is an identifier node for the subroutine name.
5503 SIZE is the number of bytes of arguments passed on the stack.
5505 On the 80386, the RTD insn may be used to pop them if the number
5506 of args is fixed, but if the number is variable then the caller
5507 must pop them all. RTD can't be used for library calls now
5508 because the library is compiled with the Unix compiler.
5509 Use of RTD is a selectable option, since it is incompatible with
5510 standard Unix calling sequences. If the option is not selected,
5511 the caller must always pop the args.
5513 The attribute stdcall is equivalent to RTD on a per module basis. */
5516 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5520 /* None of the 64-bit ABIs pop arguments. */
5524 ccvt
= ix86_get_callcvt (funtype
);
5526 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5527 | IX86_CALLCVT_THISCALL
)) != 0
5528 && ! stdarg_p (funtype
))
5531 /* Lose any fake structure return argument if it is passed on the stack. */
5532 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5533 && !ix86_keep_aggregate_return_pointer (funtype
))
5535 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5537 return GET_MODE_SIZE (Pmode
);
5543 /* Argument support functions. */
5545 /* Return true when register may be used to pass function parameters. */
5547 ix86_function_arg_regno_p (int regno
)
5550 const int *parm_regs
;
5555 return (regno
< REGPARM_MAX
5556 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5558 return (regno
< REGPARM_MAX
5559 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5560 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5561 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5562 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5567 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5572 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5573 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5577 /* TODO: The function should depend on current function ABI but
5578 builtins.c would need updating then. Therefore we use the
5581 /* RAX is used as hidden argument to va_arg functions. */
5582 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5585 if (ix86_abi
== MS_ABI
)
5586 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5588 parm_regs
= x86_64_int_parameter_registers
;
5589 for (i
= 0; i
< (ix86_abi
== MS_ABI
5590 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5591 if (regno
== parm_regs
[i
])
5596 /* Return if we do not know how to pass TYPE solely in registers. */
5599 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5601 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5604 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5605 The layout_type routine is crafty and tries to trick us into passing
5606 currently unsupported vector types on the stack by using TImode. */
5607 return (!TARGET_64BIT
&& mode
== TImode
5608 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5611 /* It returns the size, in bytes, of the area reserved for arguments passed
5612 in registers for the function represented by fndecl dependent to the used
5615 ix86_reg_parm_stack_space (const_tree fndecl
)
5617 enum calling_abi call_abi
= SYSV_ABI
;
5618 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5619 call_abi
= ix86_function_abi (fndecl
);
5621 call_abi
= ix86_function_type_abi (fndecl
);
5622 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5627 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5630 ix86_function_type_abi (const_tree fntype
)
5632 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5634 enum calling_abi abi
= ix86_abi
;
5635 if (abi
== SYSV_ABI
)
5637 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5640 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5648 ix86_function_ms_hook_prologue (const_tree fn
)
5650 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5652 if (decl_function_context (fn
) != NULL_TREE
)
5653 error_at (DECL_SOURCE_LOCATION (fn
),
5654 "ms_hook_prologue is not compatible with nested function");
5661 static enum calling_abi
5662 ix86_function_abi (const_tree fndecl
)
5666 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5669 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5672 ix86_cfun_abi (void)
5676 return cfun
->machine
->call_abi
;
5679 /* Write the extra assembler code needed to declare a function properly. */
5682 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5685 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5689 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5690 unsigned int filler_cc
= 0xcccccccc;
5692 for (i
= 0; i
< filler_count
; i
+= 4)
5693 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5696 #ifdef SUBTARGET_ASM_UNWIND_INIT
5697 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5700 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5702 /* Output magic byte marker, if hot-patch attribute is set. */
5707 /* leaq [%rsp + 0], %rsp */
5708 asm_fprintf (asm_out_file
, ASM_BYTE
5709 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5713 /* movl.s %edi, %edi
5715 movl.s %esp, %ebp */
5716 asm_fprintf (asm_out_file
, ASM_BYTE
5717 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5723 extern void init_regs (void);
5725 /* Implementation of call abi switching target hook. Specific to FNDECL
5726 the specific call register sets are set. See also
5727 ix86_conditional_register_usage for more details. */
5729 ix86_call_abi_override (const_tree fndecl
)
5731 if (fndecl
== NULL_TREE
)
5732 cfun
->machine
->call_abi
= ix86_abi
;
5734 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5737 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5738 expensive re-initialization of init_regs each time we switch function context
5739 since this is needed only during RTL expansion. */
5741 ix86_maybe_switch_abi (void)
5744 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5748 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5749 for a call to a function whose data type is FNTYPE.
5750 For a library call, FNTYPE is 0. */
5753 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5754 tree fntype
, /* tree ptr for function decl */
5755 rtx libname
, /* SYMBOL_REF of library name or 0 */
5759 struct cgraph_local_info
*i
;
5762 memset (cum
, 0, sizeof (*cum
));
5764 /* Initialize for the current callee. */
5767 cfun
->machine
->callee_pass_avx256_p
= false;
5768 cfun
->machine
->callee_return_avx256_p
= false;
5773 i
= cgraph_local_info (fndecl
);
5774 cum
->call_abi
= ix86_function_abi (fndecl
);
5775 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5780 cum
->call_abi
= ix86_function_type_abi (fntype
);
5782 fnret_type
= TREE_TYPE (fntype
);
5787 if (TARGET_VZEROUPPER
&& fnret_type
)
5789 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5791 if (function_pass_avx256_p (fnret_value
))
5793 /* The return value of this function uses 256bit AVX modes. */
5795 cfun
->machine
->callee_return_avx256_p
= true;
5797 cfun
->machine
->caller_return_avx256_p
= true;
5801 cum
->caller
= caller
;
5803 /* Set up the number of registers to use for passing arguments. */
5805 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5806 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5807 "or subtarget optimization implying it");
5808 cum
->nregs
= ix86_regparm
;
5811 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5812 ? X86_64_REGPARM_MAX
5813 : X86_64_MS_REGPARM_MAX
);
5817 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5820 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5821 ? X86_64_SSE_REGPARM_MAX
5822 : X86_64_MS_SSE_REGPARM_MAX
);
5826 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5827 cum
->warn_avx
= true;
5828 cum
->warn_sse
= true;
5829 cum
->warn_mmx
= true;
5831 /* Because type might mismatch in between caller and callee, we need to
5832 use actual type of function for local calls.
5833 FIXME: cgraph_analyze can be told to actually record if function uses
5834 va_start so for local functions maybe_vaarg can be made aggressive
5836 FIXME: once typesytem is fixed, we won't need this code anymore. */
5837 if (i
&& i
->local
&& i
->can_change_signature
)
5838 fntype
= TREE_TYPE (fndecl
);
5839 cum
->maybe_vaarg
= (fntype
5840 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5845 /* If there are variable arguments, then we won't pass anything
5846 in registers in 32-bit mode. */
5847 if (stdarg_p (fntype
))
5858 /* Use ecx and edx registers if function has fastcall attribute,
5859 else look for regparm information. */
5862 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5863 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5866 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5868 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5874 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5877 /* Set up the number of SSE registers used for passing SFmode
5878 and DFmode arguments. Warn for mismatching ABI. */
5879 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5883 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5884 But in the case of vector types, it is some vector mode.
5886 When we have only some of our vector isa extensions enabled, then there
5887 are some modes for which vector_mode_supported_p is false. For these
5888 modes, the generic vector support in gcc will choose some non-vector mode
5889 in order to implement the type. By computing the natural mode, we'll
5890 select the proper ABI location for the operand and not depend on whatever
5891 the middle-end decides to do with these vector types.
5893 The midde-end can't deal with the vector types > 16 bytes. In this
5894 case, we return the original mode and warn ABI change if CUM isn't
5897 static enum machine_mode
5898 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5900 enum machine_mode mode
= TYPE_MODE (type
);
5902 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5904 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5905 if ((size
== 8 || size
== 16 || size
== 32)
5906 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5907 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5909 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5911 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5912 mode
= MIN_MODE_VECTOR_FLOAT
;
5914 mode
= MIN_MODE_VECTOR_INT
;
5916 /* Get the mode which has this inner mode and number of units. */
5917 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5918 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5919 && GET_MODE_INNER (mode
) == innermode
)
5921 if (size
== 32 && !TARGET_AVX
)
5923 static bool warnedavx
;
5930 warning (0, "AVX vector argument without AVX "
5931 "enabled changes the ABI");
5933 return TYPE_MODE (type
);
5935 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5937 static bool warnedsse
;
5944 warning (0, "SSE vector argument without SSE "
5945 "enabled changes the ABI");
5960 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5961 this may not agree with the mode that the type system has chosen for the
5962 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5963 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5966 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5971 if (orig_mode
!= BLKmode
)
5972 tmp
= gen_rtx_REG (orig_mode
, regno
);
5975 tmp
= gen_rtx_REG (mode
, regno
);
5976 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5977 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5983 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5984 of this code is to classify each 8bytes of incoming argument by the register
5985 class and assign registers accordingly. */
5987 /* Return the union class of CLASS1 and CLASS2.
5988 See the x86-64 PS ABI for details. */
5990 static enum x86_64_reg_class
5991 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5993 /* Rule #1: If both classes are equal, this is the resulting class. */
5994 if (class1
== class2
)
5997 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5999 if (class1
== X86_64_NO_CLASS
)
6001 if (class2
== X86_64_NO_CLASS
)
6004 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6005 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6006 return X86_64_MEMORY_CLASS
;
6008 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6009 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6010 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6011 return X86_64_INTEGERSI_CLASS
;
6012 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6013 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6014 return X86_64_INTEGER_CLASS
;
6016 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6018 if (class1
== X86_64_X87_CLASS
6019 || class1
== X86_64_X87UP_CLASS
6020 || class1
== X86_64_COMPLEX_X87_CLASS
6021 || class2
== X86_64_X87_CLASS
6022 || class2
== X86_64_X87UP_CLASS
6023 || class2
== X86_64_COMPLEX_X87_CLASS
)
6024 return X86_64_MEMORY_CLASS
;
6026 /* Rule #6: Otherwise class SSE is used. */
6027 return X86_64_SSE_CLASS
;
6030 /* Classify the argument of type TYPE and mode MODE.
6031 CLASSES will be filled by the register class used to pass each word
6032 of the operand. The number of words is returned. In case the parameter
6033 should be passed in memory, 0 is returned. As a special case for zero
6034 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6036 BIT_OFFSET is used internally for handling records and specifies offset
6037 of the offset in bits modulo 256 to avoid overflow cases.
6039 See the x86-64 PS ABI for details.
6043 classify_argument (enum machine_mode mode
, const_tree type
,
6044 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6046 HOST_WIDE_INT bytes
=
6047 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6049 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6051 /* Variable sized entities are always passed/returned in memory. */
6055 if (mode
!= VOIDmode
6056 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6059 if (type
&& AGGREGATE_TYPE_P (type
))
6063 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6065 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6069 for (i
= 0; i
< words
; i
++)
6070 classes
[i
] = X86_64_NO_CLASS
;
6072 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6073 signalize memory class, so handle it as special case. */
6076 classes
[0] = X86_64_NO_CLASS
;
6080 /* Classify each field of record and merge classes. */
6081 switch (TREE_CODE (type
))
6084 /* And now merge the fields of structure. */
6085 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6087 if (TREE_CODE (field
) == FIELD_DECL
)
6091 if (TREE_TYPE (field
) == error_mark_node
)
6094 /* Bitfields are always classified as integer. Handle them
6095 early, since later code would consider them to be
6096 misaligned integers. */
6097 if (DECL_BIT_FIELD (field
))
6099 for (i
= (int_bit_position (field
)
6100 + (bit_offset
% 64)) / 8 / 8;
6101 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6102 + tree_low_cst (DECL_SIZE (field
), 0)
6105 merge_classes (X86_64_INTEGER_CLASS
,
6112 type
= TREE_TYPE (field
);
6114 /* Flexible array member is ignored. */
6115 if (TYPE_MODE (type
) == BLKmode
6116 && TREE_CODE (type
) == ARRAY_TYPE
6117 && TYPE_SIZE (type
) == NULL_TREE
6118 && TYPE_DOMAIN (type
) != NULL_TREE
6119 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6124 if (!warned
&& warn_psabi
)
6127 inform (input_location
,
6128 "the ABI of passing struct with"
6129 " a flexible array member has"
6130 " changed in GCC 4.4");
6134 num
= classify_argument (TYPE_MODE (type
), type
,
6136 (int_bit_position (field
)
6137 + bit_offset
) % 256);
6140 pos
= (int_bit_position (field
)
6141 + (bit_offset
% 64)) / 8 / 8;
6142 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6144 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6151 /* Arrays are handled as small records. */
6154 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6155 TREE_TYPE (type
), subclasses
, bit_offset
);
6159 /* The partial classes are now full classes. */
6160 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6161 subclasses
[0] = X86_64_SSE_CLASS
;
6162 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6163 && !((bit_offset
% 64) == 0 && bytes
== 4))
6164 subclasses
[0] = X86_64_INTEGER_CLASS
;
6166 for (i
= 0; i
< words
; i
++)
6167 classes
[i
] = subclasses
[i
% num
];
6172 case QUAL_UNION_TYPE
:
6173 /* Unions are similar to RECORD_TYPE but offset is always 0.
6175 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6177 if (TREE_CODE (field
) == FIELD_DECL
)
6181 if (TREE_TYPE (field
) == error_mark_node
)
6184 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6185 TREE_TYPE (field
), subclasses
,
6189 for (i
= 0; i
< num
; i
++)
6190 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6201 /* When size > 16 bytes, if the first one isn't
6202 X86_64_SSE_CLASS or any other ones aren't
6203 X86_64_SSEUP_CLASS, everything should be passed in
6205 if (classes
[0] != X86_64_SSE_CLASS
)
6208 for (i
= 1; i
< words
; i
++)
6209 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6213 /* Final merger cleanup. */
6214 for (i
= 0; i
< words
; i
++)
6216 /* If one class is MEMORY, everything should be passed in
6218 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6221 /* The X86_64_SSEUP_CLASS should be always preceded by
6222 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6223 if (classes
[i
] == X86_64_SSEUP_CLASS
6224 && classes
[i
- 1] != X86_64_SSE_CLASS
6225 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6227 /* The first one should never be X86_64_SSEUP_CLASS. */
6228 gcc_assert (i
!= 0);
6229 classes
[i
] = X86_64_SSE_CLASS
;
6232 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6233 everything should be passed in memory. */
6234 if (classes
[i
] == X86_64_X87UP_CLASS
6235 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6239 /* The first one should never be X86_64_X87UP_CLASS. */
6240 gcc_assert (i
!= 0);
6241 if (!warned
&& warn_psabi
)
6244 inform (input_location
,
6245 "the ABI of passing union with long double"
6246 " has changed in GCC 4.4");
6254 /* Compute alignment needed. We align all types to natural boundaries with
6255 exception of XFmode that is aligned to 64bits. */
6256 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6258 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6261 mode_alignment
= 128;
6262 else if (mode
== XCmode
)
6263 mode_alignment
= 256;
6264 if (COMPLEX_MODE_P (mode
))
6265 mode_alignment
/= 2;
6266 /* Misaligned fields are always returned in memory. */
6267 if (bit_offset
% mode_alignment
)
6271 /* for V1xx modes, just use the base mode */
6272 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6273 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6274 mode
= GET_MODE_INNER (mode
);
6276 /* Classification of atomic types. */
6281 classes
[0] = X86_64_SSE_CLASS
;
6284 classes
[0] = X86_64_SSE_CLASS
;
6285 classes
[1] = X86_64_SSEUP_CLASS
;
6295 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6299 classes
[0] = X86_64_INTEGERSI_CLASS
;
6302 else if (size
<= 64)
6304 classes
[0] = X86_64_INTEGER_CLASS
;
6307 else if (size
<= 64+32)
6309 classes
[0] = X86_64_INTEGER_CLASS
;
6310 classes
[1] = X86_64_INTEGERSI_CLASS
;
6313 else if (size
<= 64+64)
6315 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6323 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6327 /* OImode shouldn't be used directly. */
6332 if (!(bit_offset
% 64))
6333 classes
[0] = X86_64_SSESF_CLASS
;
6335 classes
[0] = X86_64_SSE_CLASS
;
6338 classes
[0] = X86_64_SSEDF_CLASS
;
6341 classes
[0] = X86_64_X87_CLASS
;
6342 classes
[1] = X86_64_X87UP_CLASS
;
6345 classes
[0] = X86_64_SSE_CLASS
;
6346 classes
[1] = X86_64_SSEUP_CLASS
;
6349 classes
[0] = X86_64_SSE_CLASS
;
6350 if (!(bit_offset
% 64))
6356 if (!warned
&& warn_psabi
)
6359 inform (input_location
,
6360 "the ABI of passing structure with complex float"
6361 " member has changed in GCC 4.4");
6363 classes
[1] = X86_64_SSESF_CLASS
;
6367 classes
[0] = X86_64_SSEDF_CLASS
;
6368 classes
[1] = X86_64_SSEDF_CLASS
;
6371 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6374 /* This modes is larger than 16 bytes. */
6382 classes
[0] = X86_64_SSE_CLASS
;
6383 classes
[1] = X86_64_SSEUP_CLASS
;
6384 classes
[2] = X86_64_SSEUP_CLASS
;
6385 classes
[3] = X86_64_SSEUP_CLASS
;
6393 classes
[0] = X86_64_SSE_CLASS
;
6394 classes
[1] = X86_64_SSEUP_CLASS
;
6402 classes
[0] = X86_64_SSE_CLASS
;
6408 gcc_assert (VECTOR_MODE_P (mode
));
6413 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6415 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6416 classes
[0] = X86_64_INTEGERSI_CLASS
;
6418 classes
[0] = X86_64_INTEGER_CLASS
;
6419 classes
[1] = X86_64_INTEGER_CLASS
;
6420 return 1 + (bytes
> 8);
6424 /* Examine the argument and return set number of register required in each
6425 class. Return 0 iff parameter should be passed in memory. */
6427 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6428 int *int_nregs
, int *sse_nregs
)
6430 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6431 int n
= classify_argument (mode
, type
, regclass
, 0);
6437 for (n
--; n
>= 0; n
--)
6438 switch (regclass
[n
])
6440 case X86_64_INTEGER_CLASS
:
6441 case X86_64_INTEGERSI_CLASS
:
6444 case X86_64_SSE_CLASS
:
6445 case X86_64_SSESF_CLASS
:
6446 case X86_64_SSEDF_CLASS
:
6449 case X86_64_NO_CLASS
:
6450 case X86_64_SSEUP_CLASS
:
6452 case X86_64_X87_CLASS
:
6453 case X86_64_X87UP_CLASS
:
6457 case X86_64_COMPLEX_X87_CLASS
:
6458 return in_return
? 2 : 0;
6459 case X86_64_MEMORY_CLASS
:
6465 /* Construct container for the argument used by GCC interface. See
6466 FUNCTION_ARG for the detailed description. */
6469 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6470 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6471 const int *intreg
, int sse_regno
)
6473 /* The following variables hold the static issued_error state. */
6474 static bool issued_sse_arg_error
;
6475 static bool issued_sse_ret_error
;
6476 static bool issued_x87_ret_error
;
6478 enum machine_mode tmpmode
;
6480 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6481 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6485 int needed_sseregs
, needed_intregs
;
6486 rtx exp
[MAX_CLASSES
];
6489 n
= classify_argument (mode
, type
, regclass
, 0);
6492 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6495 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6498 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6499 some less clueful developer tries to use floating-point anyway. */
6500 if (needed_sseregs
&& !TARGET_SSE
)
6504 if (!issued_sse_ret_error
)
6506 error ("SSE register return with SSE disabled");
6507 issued_sse_ret_error
= true;
6510 else if (!issued_sse_arg_error
)
6512 error ("SSE register argument with SSE disabled");
6513 issued_sse_arg_error
= true;
6518 /* Likewise, error if the ABI requires us to return values in the
6519 x87 registers and the user specified -mno-80387. */
6520 if (!TARGET_80387
&& in_return
)
6521 for (i
= 0; i
< n
; i
++)
6522 if (regclass
[i
] == X86_64_X87_CLASS
6523 || regclass
[i
] == X86_64_X87UP_CLASS
6524 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6526 if (!issued_x87_ret_error
)
6528 error ("x87 register return with x87 disabled");
6529 issued_x87_ret_error
= true;
6534 /* First construct simple cases. Avoid SCmode, since we want to use
6535 single register to pass this type. */
6536 if (n
== 1 && mode
!= SCmode
)
6537 switch (regclass
[0])
6539 case X86_64_INTEGER_CLASS
:
6540 case X86_64_INTEGERSI_CLASS
:
6541 return gen_rtx_REG (mode
, intreg
[0]);
6542 case X86_64_SSE_CLASS
:
6543 case X86_64_SSESF_CLASS
:
6544 case X86_64_SSEDF_CLASS
:
6545 if (mode
!= BLKmode
)
6546 return gen_reg_or_parallel (mode
, orig_mode
,
6547 SSE_REGNO (sse_regno
));
6549 case X86_64_X87_CLASS
:
6550 case X86_64_COMPLEX_X87_CLASS
:
6551 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6552 case X86_64_NO_CLASS
:
6553 /* Zero sized array, struct or class. */
6559 && regclass
[0] == X86_64_SSE_CLASS
6560 && regclass
[1] == X86_64_SSEUP_CLASS
6562 return gen_reg_or_parallel (mode
, orig_mode
,
6563 SSE_REGNO (sse_regno
));
6565 && regclass
[0] == X86_64_SSE_CLASS
6566 && regclass
[1] == X86_64_SSEUP_CLASS
6567 && regclass
[2] == X86_64_SSEUP_CLASS
6568 && regclass
[3] == X86_64_SSEUP_CLASS
6570 return gen_reg_or_parallel (mode
, orig_mode
,
6571 SSE_REGNO (sse_regno
));
6573 && regclass
[0] == X86_64_X87_CLASS
6574 && regclass
[1] == X86_64_X87UP_CLASS
)
6575 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6578 && regclass
[0] == X86_64_INTEGER_CLASS
6579 && regclass
[1] == X86_64_INTEGER_CLASS
6580 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6581 && intreg
[0] + 1 == intreg
[1])
6582 return gen_rtx_REG (mode
, intreg
[0]);
6584 /* Otherwise figure out the entries of the PARALLEL. */
6585 for (i
= 0; i
< n
; i
++)
6589 switch (regclass
[i
])
6591 case X86_64_NO_CLASS
:
6593 case X86_64_INTEGER_CLASS
:
6594 case X86_64_INTEGERSI_CLASS
:
6595 /* Merge TImodes on aligned occasions here too. */
6596 if (i
* 8 + 8 > bytes
)
6598 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6599 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6603 /* We've requested 24 bytes we
6604 don't have mode for. Use DImode. */
6605 if (tmpmode
== BLKmode
)
6608 = gen_rtx_EXPR_LIST (VOIDmode
,
6609 gen_rtx_REG (tmpmode
, *intreg
),
6613 case X86_64_SSESF_CLASS
:
6615 = gen_rtx_EXPR_LIST (VOIDmode
,
6616 gen_rtx_REG (SFmode
,
6617 SSE_REGNO (sse_regno
)),
6621 case X86_64_SSEDF_CLASS
:
6623 = gen_rtx_EXPR_LIST (VOIDmode
,
6624 gen_rtx_REG (DFmode
,
6625 SSE_REGNO (sse_regno
)),
6629 case X86_64_SSE_CLASS
:
6637 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6647 && regclass
[1] == X86_64_SSEUP_CLASS
6648 && regclass
[2] == X86_64_SSEUP_CLASS
6649 && regclass
[3] == X86_64_SSEUP_CLASS
);
6657 = gen_rtx_EXPR_LIST (VOIDmode
,
6658 gen_rtx_REG (tmpmode
,
6659 SSE_REGNO (sse_regno
)),
6668 /* Empty aligned struct, union or class. */
6672 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6673 for (i
= 0; i
< nexps
; i
++)
6674 XVECEXP (ret
, 0, i
) = exp
[i
];
6678 /* Update the data in CUM to advance over an argument of mode MODE
6679 and data type TYPE. (TYPE is null for libcalls where that information
6680 may not be available.) */
6683 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6684 const_tree type
, HOST_WIDE_INT bytes
,
6685 HOST_WIDE_INT words
)
6701 cum
->words
+= words
;
6702 cum
->nregs
-= words
;
6703 cum
->regno
+= words
;
6705 if (cum
->nregs
<= 0)
6713 /* OImode shouldn't be used directly. */
6717 if (cum
->float_in_sse
< 2)
6720 if (cum
->float_in_sse
< 1)
6737 if (!type
|| !AGGREGATE_TYPE_P (type
))
6739 cum
->sse_words
+= words
;
6740 cum
->sse_nregs
-= 1;
6741 cum
->sse_regno
+= 1;
6742 if (cum
->sse_nregs
<= 0)
6756 if (!type
|| !AGGREGATE_TYPE_P (type
))
6758 cum
->mmx_words
+= words
;
6759 cum
->mmx_nregs
-= 1;
6760 cum
->mmx_regno
+= 1;
6761 if (cum
->mmx_nregs
<= 0)
6772 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6773 const_tree type
, HOST_WIDE_INT words
, bool named
)
6775 int int_nregs
, sse_nregs
;
6777 /* Unnamed 256bit vector mode parameters are passed on stack. */
6778 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6781 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6782 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6784 cum
->nregs
-= int_nregs
;
6785 cum
->sse_nregs
-= sse_nregs
;
6786 cum
->regno
+= int_nregs
;
6787 cum
->sse_regno
+= sse_nregs
;
6791 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6792 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6793 cum
->words
+= words
;
6798 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6799 HOST_WIDE_INT words
)
6801 /* Otherwise, this should be passed indirect. */
6802 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6804 cum
->words
+= words
;
6812 /* Update the data in CUM to advance over an argument of mode MODE and
6813 data type TYPE. (TYPE is null for libcalls where that information
6814 may not be available.) */
6817 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6818 const_tree type
, bool named
)
6820 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6821 HOST_WIDE_INT bytes
, words
;
6823 if (mode
== BLKmode
)
6824 bytes
= int_size_in_bytes (type
);
6826 bytes
= GET_MODE_SIZE (mode
);
6827 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6830 mode
= type_natural_mode (type
, NULL
);
6832 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6833 function_arg_advance_ms_64 (cum
, bytes
, words
);
6834 else if (TARGET_64BIT
)
6835 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6837 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6840 /* Define where to put the arguments to a function.
6841 Value is zero to push the argument on the stack,
6842 or a hard register in which to store the argument.
6844 MODE is the argument's machine mode.
6845 TYPE is the data type of the argument (as a tree).
6846 This is null for libcalls where that information may
6848 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6849 the preceding args and about the function being called.
6850 NAMED is nonzero if this argument is a named parameter
6851 (otherwise it is an extra parameter matching an ellipsis). */
6854 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6855 enum machine_mode orig_mode
, const_tree type
,
6856 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6858 static bool warnedsse
, warnedmmx
;
6860 /* Avoid the AL settings for the Unix64 ABI. */
6861 if (mode
== VOIDmode
)
6877 if (words
<= cum
->nregs
)
6879 int regno
= cum
->regno
;
6881 /* Fastcall allocates the first two DWORD (SImode) or
6882 smaller arguments to ECX and EDX if it isn't an
6888 || (type
&& AGGREGATE_TYPE_P (type
)))
6891 /* ECX not EAX is the first allocated register. */
6892 if (regno
== AX_REG
)
6895 return gen_rtx_REG (mode
, regno
);
6900 if (cum
->float_in_sse
< 2)
6903 if (cum
->float_in_sse
< 1)
6907 /* In 32bit, we pass TImode in xmm registers. */
6914 if (!type
|| !AGGREGATE_TYPE_P (type
))
6916 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6919 warning (0, "SSE vector argument without SSE enabled "
6923 return gen_reg_or_parallel (mode
, orig_mode
,
6924 cum
->sse_regno
+ FIRST_SSE_REG
);
6929 /* OImode shouldn't be used directly. */
6938 if (!type
|| !AGGREGATE_TYPE_P (type
))
6941 return gen_reg_or_parallel (mode
, orig_mode
,
6942 cum
->sse_regno
+ FIRST_SSE_REG
);
6952 if (!type
|| !AGGREGATE_TYPE_P (type
))
6954 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6957 warning (0, "MMX vector argument without MMX enabled "
6961 return gen_reg_or_parallel (mode
, orig_mode
,
6962 cum
->mmx_regno
+ FIRST_MMX_REG
);
6971 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6972 enum machine_mode orig_mode
, const_tree type
, bool named
)
6974 /* Handle a hidden AL argument containing number of registers
6975 for varargs x86-64 functions. */
6976 if (mode
== VOIDmode
)
6977 return GEN_INT (cum
->maybe_vaarg
6978 ? (cum
->sse_nregs
< 0
6979 ? X86_64_SSE_REGPARM_MAX
6994 /* Unnamed 256bit vector mode parameters are passed on stack. */
7000 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7002 &x86_64_int_parameter_registers
[cum
->regno
],
7007 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7008 enum machine_mode orig_mode
, bool named
,
7009 HOST_WIDE_INT bytes
)
7013 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7014 We use value of -2 to specify that current function call is MSABI. */
7015 if (mode
== VOIDmode
)
7016 return GEN_INT (-2);
7018 /* If we've run out of registers, it goes on the stack. */
7019 if (cum
->nregs
== 0)
7022 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7024 /* Only floating point modes are passed in anything but integer regs. */
7025 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7028 regno
= cum
->regno
+ FIRST_SSE_REG
;
7033 /* Unnamed floating parameters are passed in both the
7034 SSE and integer registers. */
7035 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7036 t2
= gen_rtx_REG (mode
, regno
);
7037 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7038 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7039 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7042 /* Handle aggregated types passed in register. */
7043 if (orig_mode
== BLKmode
)
7045 if (bytes
> 0 && bytes
<= 8)
7046 mode
= (bytes
> 4 ? DImode
: SImode
);
7047 if (mode
== BLKmode
)
7051 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7054 /* Return where to put the arguments to a function.
7055 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7057 MODE is the argument's machine mode. TYPE is the data type of the
7058 argument. It is null for libcalls where that information may not be
7059 available. CUM gives information about the preceding args and about
7060 the function being called. NAMED is nonzero if this argument is a
7061 named parameter (otherwise it is an extra parameter matching an
7065 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7066 const_tree type
, bool named
)
7068 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7069 enum machine_mode mode
= omode
;
7070 HOST_WIDE_INT bytes
, words
;
7073 if (mode
== BLKmode
)
7074 bytes
= int_size_in_bytes (type
);
7076 bytes
= GET_MODE_SIZE (mode
);
7077 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7079 /* To simplify the code below, represent vector types with a vector mode
7080 even if MMX/SSE are not active. */
7081 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7082 mode
= type_natural_mode (type
, cum
);
7084 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7085 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7086 else if (TARGET_64BIT
)
7087 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7089 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7091 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
7093 /* This argument uses 256bit AVX modes. */
7095 cfun
->machine
->callee_pass_avx256_p
= true;
7097 cfun
->machine
->caller_pass_avx256_p
= true;
7103 /* A C expression that indicates when an argument must be passed by
7104 reference. If nonzero for an argument, a copy of that argument is
7105 made in memory and a pointer to the argument is passed instead of
7106 the argument itself. The pointer is passed in whatever way is
7107 appropriate for passing a pointer to that type. */
7110 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
7111 enum machine_mode mode ATTRIBUTE_UNUSED
,
7112 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7114 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7116 /* See Windows x64 Software Convention. */
7117 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7119 int msize
= (int) GET_MODE_SIZE (mode
);
7122 /* Arrays are passed by reference. */
7123 if (TREE_CODE (type
) == ARRAY_TYPE
)
7126 if (AGGREGATE_TYPE_P (type
))
7128 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7129 are passed by reference. */
7130 msize
= int_size_in_bytes (type
);
7134 /* __m128 is passed by reference. */
7136 case 1: case 2: case 4: case 8:
7142 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7148 /* Return true when TYPE should be 128bit aligned for 32bit argument
7149 passing ABI. XXX: This function is obsolete and is only used for
7150 checking psABI compatibility with previous versions of GCC. */
7153 ix86_compat_aligned_value_p (const_tree type
)
7155 enum machine_mode mode
= TYPE_MODE (type
);
7156 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7160 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7162 if (TYPE_ALIGN (type
) < 128)
7165 if (AGGREGATE_TYPE_P (type
))
7167 /* Walk the aggregates recursively. */
7168 switch (TREE_CODE (type
))
7172 case QUAL_UNION_TYPE
:
7176 /* Walk all the structure fields. */
7177 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7179 if (TREE_CODE (field
) == FIELD_DECL
7180 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7187 /* Just for use if some languages passes arrays by value. */
7188 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7199 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7200 XXX: This function is obsolete and is only used for checking psABI
7201 compatibility with previous versions of GCC. */
7204 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7205 const_tree type
, unsigned int align
)
7207 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7208 natural boundaries. */
7209 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7211 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7212 make an exception for SSE modes since these require 128bit
7215 The handling here differs from field_alignment. ICC aligns MMX
7216 arguments to 4 byte boundaries, while structure fields are aligned
7217 to 8 byte boundaries. */
7220 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7221 align
= PARM_BOUNDARY
;
7225 if (!ix86_compat_aligned_value_p (type
))
7226 align
= PARM_BOUNDARY
;
7229 if (align
> BIGGEST_ALIGNMENT
)
7230 align
= BIGGEST_ALIGNMENT
;
7234 /* Return true when TYPE should be 128bit aligned for 32bit argument
7238 ix86_contains_aligned_value_p (const_tree type
)
7240 enum machine_mode mode
= TYPE_MODE (type
);
7242 if (mode
== XFmode
|| mode
== XCmode
)
7245 if (TYPE_ALIGN (type
) < 128)
7248 if (AGGREGATE_TYPE_P (type
))
7250 /* Walk the aggregates recursively. */
7251 switch (TREE_CODE (type
))
7255 case QUAL_UNION_TYPE
:
7259 /* Walk all the structure fields. */
7260 for (field
= TYPE_FIELDS (type
);
7262 field
= DECL_CHAIN (field
))
7264 if (TREE_CODE (field
) == FIELD_DECL
7265 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7272 /* Just for use if some languages passes arrays by value. */
7273 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7282 return TYPE_ALIGN (type
) >= 128;
7287 /* Gives the alignment boundary, in bits, of an argument with the
7288 specified mode and type. */
7291 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7296 /* Since the main variant type is used for call, we convert it to
7297 the main variant type. */
7298 type
= TYPE_MAIN_VARIANT (type
);
7299 align
= TYPE_ALIGN (type
);
7302 align
= GET_MODE_ALIGNMENT (mode
);
7303 if (align
< PARM_BOUNDARY
)
7304 align
= PARM_BOUNDARY
;
7308 unsigned int saved_align
= align
;
7312 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7315 if (mode
== XFmode
|| mode
== XCmode
)
7316 align
= PARM_BOUNDARY
;
7318 else if (!ix86_contains_aligned_value_p (type
))
7319 align
= PARM_BOUNDARY
;
7322 align
= PARM_BOUNDARY
;
7327 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7331 inform (input_location
,
7332 "The ABI for passing parameters with %d-byte"
7333 " alignment has changed in GCC 4.6",
7334 align
/ BITS_PER_UNIT
);
7341 /* Return true if N is a possible register number of function value. */
7344 ix86_function_value_regno_p (const unsigned int regno
)
7351 case FIRST_FLOAT_REG
:
7352 /* TODO: The function should depend on current function ABI but
7353 builtins.c would need updating then. Therefore we use the
7355 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7357 return TARGET_FLOAT_RETURNS_IN_80387
;
7363 if (TARGET_MACHO
|| TARGET_64BIT
)
7371 /* Define how to find the value returned by a function.
7372 VALTYPE is the data type of the value (as a tree).
7373 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7374 otherwise, FUNC is 0. */
7377 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7378 const_tree fntype
, const_tree fn
)
7382 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7383 we normally prevent this case when mmx is not available. However
7384 some ABIs may require the result to be returned like DImode. */
7385 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7386 regno
= FIRST_MMX_REG
;
7388 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7389 we prevent this case when sse is not available. However some ABIs
7390 may require the result to be returned like integer TImode. */
7391 else if (mode
== TImode
7392 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7393 regno
= FIRST_SSE_REG
;
7395 /* 32-byte vector modes in %ymm0. */
7396 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7397 regno
= FIRST_SSE_REG
;
7399 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7400 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7401 regno
= FIRST_FLOAT_REG
;
7403 /* Most things go in %eax. */
7406 /* Override FP return register with %xmm0 for local functions when
7407 SSE math is enabled or for functions with sseregparm attribute. */
7408 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7410 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7411 if ((sse_level
>= 1 && mode
== SFmode
)
7412 || (sse_level
== 2 && mode
== DFmode
))
7413 regno
= FIRST_SSE_REG
;
7416 /* OImode shouldn't be used directly. */
7417 gcc_assert (mode
!= OImode
);
7419 return gen_rtx_REG (orig_mode
, regno
);
7423 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7428 /* Handle libcalls, which don't provide a type node. */
7429 if (valtype
== NULL
)
7443 regno
= FIRST_SSE_REG
;
7447 regno
= FIRST_FLOAT_REG
;
7455 return gen_rtx_REG (mode
, regno
);
7457 else if (POINTER_TYPE_P (valtype
))
7459 /* Pointers are always returned in word_mode. */
7463 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7464 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7465 x86_64_int_return_registers
, 0);
7467 /* For zero sized structures, construct_container returns NULL, but we
7468 need to keep rest of compiler happy by returning meaningful value. */
7470 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7476 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7478 unsigned int regno
= AX_REG
;
7482 switch (GET_MODE_SIZE (mode
))
7485 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7486 && !COMPLEX_MODE_P (mode
))
7487 regno
= FIRST_SSE_REG
;
7491 if (mode
== SFmode
|| mode
== DFmode
)
7492 regno
= FIRST_SSE_REG
;
7498 return gen_rtx_REG (orig_mode
, regno
);
7502 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7503 enum machine_mode orig_mode
, enum machine_mode mode
)
7505 const_tree fn
, fntype
;
7508 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7509 fn
= fntype_or_decl
;
7510 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7512 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7513 return function_value_ms_64 (orig_mode
, mode
);
7514 else if (TARGET_64BIT
)
7515 return function_value_64 (orig_mode
, mode
, valtype
);
7517 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7521 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7522 bool outgoing ATTRIBUTE_UNUSED
)
7524 enum machine_mode mode
, orig_mode
;
7526 orig_mode
= TYPE_MODE (valtype
);
7527 mode
= type_natural_mode (valtype
, NULL
);
7528 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7531 /* Pointer function arguments and return values are promoted to
7534 static enum machine_mode
7535 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7536 int *punsignedp
, const_tree fntype
,
7539 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7541 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7544 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7548 /* Return true if a structure, union or array with MODE containing FIELD
7549 should be accessed using BLKmode. */
7552 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7554 /* Union with XFmode must be in BLKmode. */
7555 return (mode
== XFmode
7556 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7557 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7561 ix86_libcall_value (enum machine_mode mode
)
7563 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7566 /* Return true iff type is returned in memory. */
7568 static bool ATTRIBUTE_UNUSED
7569 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7573 if (mode
== BLKmode
)
7576 size
= int_size_in_bytes (type
);
7578 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7581 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7583 /* User-created vectors small enough to fit in EAX. */
7587 /* MMX/3dNow values are returned in MM0,
7588 except when it doesn't exits or the ABI prescribes otherwise. */
7590 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7592 /* SSE values are returned in XMM0, except when it doesn't exist. */
7596 /* AVX values are returned in YMM0, except when it doesn't exist. */
7607 /* OImode shouldn't be used directly. */
7608 gcc_assert (mode
!= OImode
);
7613 static bool ATTRIBUTE_UNUSED
7614 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7616 int needed_intregs
, needed_sseregs
;
7617 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7620 static bool ATTRIBUTE_UNUSED
7621 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7623 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7625 /* __m128 is returned in xmm0. */
7626 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7627 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7630 /* Otherwise, the size must be exactly in [1248]. */
7631 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7635 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7637 #ifdef SUBTARGET_RETURN_IN_MEMORY
7638 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7640 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7644 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7645 return return_in_memory_ms_64 (type
, mode
);
7647 return return_in_memory_64 (type
, mode
);
7650 return return_in_memory_32 (type
, mode
);
7654 /* When returning SSE vector types, we have a choice of either
7655 (1) being abi incompatible with a -march switch, or
7656 (2) generating an error.
7657 Given no good solution, I think the safest thing is one warning.
7658 The user won't be able to use -Werror, but....
7660 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7661 called in response to actually generating a caller or callee that
7662 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7663 via aggregate_value_p for general type probing from tree-ssa. */
7666 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7668 static bool warnedsse
, warnedmmx
;
7670 if (!TARGET_64BIT
&& type
)
7672 /* Look at the return type of the function, not the function type. */
7673 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7675 if (!TARGET_SSE
&& !warnedsse
)
7678 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7681 warning (0, "SSE vector return without SSE enabled "
7686 if (!TARGET_MMX
&& !warnedmmx
)
7688 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7691 warning (0, "MMX vector return without MMX enabled "
7701 /* Create the va_list data type. */
7703 /* Returns the calling convention specific va_list date type.
7704 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7707 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7709 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7711 /* For i386 we use plain pointer to argument area. */
7712 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7713 return build_pointer_type (char_type_node
);
7715 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7716 type_decl
= build_decl (BUILTINS_LOCATION
,
7717 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7719 f_gpr
= build_decl (BUILTINS_LOCATION
,
7720 FIELD_DECL
, get_identifier ("gp_offset"),
7721 unsigned_type_node
);
7722 f_fpr
= build_decl (BUILTINS_LOCATION
,
7723 FIELD_DECL
, get_identifier ("fp_offset"),
7724 unsigned_type_node
);
7725 f_ovf
= build_decl (BUILTINS_LOCATION
,
7726 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7728 f_sav
= build_decl (BUILTINS_LOCATION
,
7729 FIELD_DECL
, get_identifier ("reg_save_area"),
7732 va_list_gpr_counter_field
= f_gpr
;
7733 va_list_fpr_counter_field
= f_fpr
;
7735 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7736 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7737 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7738 DECL_FIELD_CONTEXT (f_sav
) = record
;
7740 TYPE_STUB_DECL (record
) = type_decl
;
7741 TYPE_NAME (record
) = type_decl
;
7742 TYPE_FIELDS (record
) = f_gpr
;
7743 DECL_CHAIN (f_gpr
) = f_fpr
;
7744 DECL_CHAIN (f_fpr
) = f_ovf
;
7745 DECL_CHAIN (f_ovf
) = f_sav
;
7747 layout_type (record
);
7749 /* The correct type is an array type of one element. */
7750 return build_array_type (record
, build_index_type (size_zero_node
));
7753 /* Setup the builtin va_list data type and for 64-bit the additional
7754 calling convention specific va_list data types. */
7757 ix86_build_builtin_va_list (void)
7759 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7761 /* Initialize abi specific va_list builtin types. */
7765 if (ix86_abi
== MS_ABI
)
7767 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7768 if (TREE_CODE (t
) != RECORD_TYPE
)
7769 t
= build_variant_type_copy (t
);
7770 sysv_va_list_type_node
= t
;
7775 if (TREE_CODE (t
) != RECORD_TYPE
)
7776 t
= build_variant_type_copy (t
);
7777 sysv_va_list_type_node
= t
;
7779 if (ix86_abi
!= MS_ABI
)
7781 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7782 if (TREE_CODE (t
) != RECORD_TYPE
)
7783 t
= build_variant_type_copy (t
);
7784 ms_va_list_type_node
= t
;
7789 if (TREE_CODE (t
) != RECORD_TYPE
)
7790 t
= build_variant_type_copy (t
);
7791 ms_va_list_type_node
= t
;
7798 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7801 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7807 /* GPR size of varargs save area. */
7808 if (cfun
->va_list_gpr_size
)
7809 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7811 ix86_varargs_gpr_size
= 0;
7813 /* FPR size of varargs save area. We don't need it if we don't pass
7814 anything in SSE registers. */
7815 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7816 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7818 ix86_varargs_fpr_size
= 0;
7820 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7823 save_area
= frame_pointer_rtx
;
7824 set
= get_varargs_alias_set ();
7826 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7827 if (max
> X86_64_REGPARM_MAX
)
7828 max
= X86_64_REGPARM_MAX
;
7830 for (i
= cum
->regno
; i
< max
; i
++)
7832 mem
= gen_rtx_MEM (word_mode
,
7833 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7834 MEM_NOTRAP_P (mem
) = 1;
7835 set_mem_alias_set (mem
, set
);
7836 emit_move_insn (mem
,
7837 gen_rtx_REG (word_mode
,
7838 x86_64_int_parameter_registers
[i
]));
7841 if (ix86_varargs_fpr_size
)
7843 enum machine_mode smode
;
7846 /* Now emit code to save SSE registers. The AX parameter contains number
7847 of SSE parameter registers used to call this function, though all we
7848 actually check here is the zero/non-zero status. */
7850 label
= gen_label_rtx ();
7851 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7852 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7855 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7856 we used movdqa (i.e. TImode) instead? Perhaps even better would
7857 be if we could determine the real mode of the data, via a hook
7858 into pass_stdarg. Ignore all that for now. */
7860 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7861 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7863 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7864 if (max
> X86_64_SSE_REGPARM_MAX
)
7865 max
= X86_64_SSE_REGPARM_MAX
;
7867 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7869 mem
= plus_constant (Pmode
, save_area
,
7870 i
* 16 + ix86_varargs_gpr_size
);
7871 mem
= gen_rtx_MEM (smode
, mem
);
7872 MEM_NOTRAP_P (mem
) = 1;
7873 set_mem_alias_set (mem
, set
);
7874 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7876 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7884 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7886 alias_set_type set
= get_varargs_alias_set ();
7889 /* Reset to zero, as there might be a sysv vaarg used
7891 ix86_varargs_gpr_size
= 0;
7892 ix86_varargs_fpr_size
= 0;
7894 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7898 mem
= gen_rtx_MEM (Pmode
,
7899 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7900 i
* UNITS_PER_WORD
));
7901 MEM_NOTRAP_P (mem
) = 1;
7902 set_mem_alias_set (mem
, set
);
7904 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7905 emit_move_insn (mem
, reg
);
7910 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7911 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7914 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7915 CUMULATIVE_ARGS next_cum
;
7918 /* This argument doesn't appear to be used anymore. Which is good,
7919 because the old code here didn't suppress rtl generation. */
7920 gcc_assert (!no_rtl
);
7925 fntype
= TREE_TYPE (current_function_decl
);
7927 /* For varargs, we do not want to skip the dummy va_dcl argument.
7928 For stdargs, we do want to skip the last named argument. */
7930 if (stdarg_p (fntype
))
7931 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7934 if (cum
->call_abi
== MS_ABI
)
7935 setup_incoming_varargs_ms_64 (&next_cum
);
7937 setup_incoming_varargs_64 (&next_cum
);
7940 /* Checks if TYPE is of kind va_list char *. */
7943 is_va_list_char_pointer (tree type
)
7947 /* For 32-bit it is always true. */
7950 canonic
= ix86_canonical_va_list_type (type
);
7951 return (canonic
== ms_va_list_type_node
7952 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7955 /* Implement va_start. */
7958 ix86_va_start (tree valist
, rtx nextarg
)
7960 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7961 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7962 tree gpr
, fpr
, ovf
, sav
, t
;
7966 if (flag_split_stack
7967 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7969 unsigned int scratch_regno
;
7971 /* When we are splitting the stack, we can't refer to the stack
7972 arguments using internal_arg_pointer, because they may be on
7973 the old stack. The split stack prologue will arrange to
7974 leave a pointer to the old stack arguments in a scratch
7975 register, which we here copy to a pseudo-register. The split
7976 stack prologue can't set the pseudo-register directly because
7977 it (the prologue) runs before any registers have been saved. */
7979 scratch_regno
= split_stack_prologue_scratch_regno ();
7980 if (scratch_regno
!= INVALID_REGNUM
)
7984 reg
= gen_reg_rtx (Pmode
);
7985 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7988 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7992 push_topmost_sequence ();
7993 emit_insn_after (seq
, entry_of_function ());
7994 pop_topmost_sequence ();
7998 /* Only 64bit target needs something special. */
7999 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8001 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8002 std_expand_builtin_va_start (valist
, nextarg
);
8007 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8008 next
= expand_binop (ptr_mode
, add_optab
,
8009 cfun
->machine
->split_stack_varargs_pointer
,
8010 crtl
->args
.arg_offset_rtx
,
8011 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8012 convert_move (va_r
, next
, 0);
8017 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8018 f_fpr
= DECL_CHAIN (f_gpr
);
8019 f_ovf
= DECL_CHAIN (f_fpr
);
8020 f_sav
= DECL_CHAIN (f_ovf
);
8022 valist
= build_simple_mem_ref (valist
);
8023 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8024 /* The following should be folded into the MEM_REF offset. */
8025 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8027 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8029 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8031 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8034 /* Count number of gp and fp argument registers used. */
8035 words
= crtl
->args
.info
.words
;
8036 n_gpr
= crtl
->args
.info
.regno
;
8037 n_fpr
= crtl
->args
.info
.sse_regno
;
8039 if (cfun
->va_list_gpr_size
)
8041 type
= TREE_TYPE (gpr
);
8042 t
= build2 (MODIFY_EXPR
, type
,
8043 gpr
, build_int_cst (type
, n_gpr
* 8));
8044 TREE_SIDE_EFFECTS (t
) = 1;
8045 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8048 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8050 type
= TREE_TYPE (fpr
);
8051 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8052 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8053 TREE_SIDE_EFFECTS (t
) = 1;
8054 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8057 /* Find the overflow area. */
8058 type
= TREE_TYPE (ovf
);
8059 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8060 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8062 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8063 t
= make_tree (type
, ovf_rtx
);
8065 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8066 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8067 TREE_SIDE_EFFECTS (t
) = 1;
8068 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8070 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8072 /* Find the register save area.
8073 Prologue of the function save it right above stack frame. */
8074 type
= TREE_TYPE (sav
);
8075 t
= make_tree (type
, frame_pointer_rtx
);
8076 if (!ix86_varargs_gpr_size
)
8077 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8078 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8079 TREE_SIDE_EFFECTS (t
) = 1;
8080 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8084 /* Implement va_arg. */
8087 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8090 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8091 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8092 tree gpr
, fpr
, ovf
, sav
, t
;
8094 tree lab_false
, lab_over
= NULL_TREE
;
8099 enum machine_mode nat_mode
;
8100 unsigned int arg_boundary
;
8102 /* Only 64bit target needs something special. */
8103 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8104 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8106 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8107 f_fpr
= DECL_CHAIN (f_gpr
);
8108 f_ovf
= DECL_CHAIN (f_fpr
);
8109 f_sav
= DECL_CHAIN (f_ovf
);
8111 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8112 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8113 valist
= build_va_arg_indirect_ref (valist
);
8114 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8115 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8116 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8118 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8120 type
= build_pointer_type (type
);
8121 size
= int_size_in_bytes (type
);
8122 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8124 nat_mode
= type_natural_mode (type
, NULL
);
8133 /* Unnamed 256bit vector mode parameters are passed on stack. */
8134 if (!TARGET_64BIT_MS_ABI
)
8141 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8142 type
, 0, X86_64_REGPARM_MAX
,
8143 X86_64_SSE_REGPARM_MAX
, intreg
,
8148 /* Pull the value out of the saved registers. */
8150 addr
= create_tmp_var (ptr_type_node
, "addr");
8154 int needed_intregs
, needed_sseregs
;
8156 tree int_addr
, sse_addr
;
8158 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8159 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8161 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8163 need_temp
= (!REG_P (container
)
8164 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8165 || TYPE_ALIGN (type
) > 128));
8167 /* In case we are passing structure, verify that it is consecutive block
8168 on the register save area. If not we need to do moves. */
8169 if (!need_temp
&& !REG_P (container
))
8171 /* Verify that all registers are strictly consecutive */
8172 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8176 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8178 rtx slot
= XVECEXP (container
, 0, i
);
8179 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8180 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8188 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8190 rtx slot
= XVECEXP (container
, 0, i
);
8191 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8192 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8204 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8205 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8208 /* First ensure that we fit completely in registers. */
8211 t
= build_int_cst (TREE_TYPE (gpr
),
8212 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8213 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8214 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8215 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8216 gimplify_and_add (t
, pre_p
);
8220 t
= build_int_cst (TREE_TYPE (fpr
),
8221 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8222 + X86_64_REGPARM_MAX
* 8);
8223 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8224 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8225 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8226 gimplify_and_add (t
, pre_p
);
8229 /* Compute index to start of area used for integer regs. */
8232 /* int_addr = gpr + sav; */
8233 t
= fold_build_pointer_plus (sav
, gpr
);
8234 gimplify_assign (int_addr
, t
, pre_p
);
8238 /* sse_addr = fpr + sav; */
8239 t
= fold_build_pointer_plus (sav
, fpr
);
8240 gimplify_assign (sse_addr
, t
, pre_p
);
8244 int i
, prev_size
= 0;
8245 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8248 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8249 gimplify_assign (addr
, t
, pre_p
);
8251 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8253 rtx slot
= XVECEXP (container
, 0, i
);
8254 rtx reg
= XEXP (slot
, 0);
8255 enum machine_mode mode
= GET_MODE (reg
);
8261 tree dest_addr
, dest
;
8262 int cur_size
= GET_MODE_SIZE (mode
);
8264 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8265 prev_size
= INTVAL (XEXP (slot
, 1));
8266 if (prev_size
+ cur_size
> size
)
8268 cur_size
= size
- prev_size
;
8269 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8270 if (mode
== BLKmode
)
8273 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8274 if (mode
== GET_MODE (reg
))
8275 addr_type
= build_pointer_type (piece_type
);
8277 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8279 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8282 if (SSE_REGNO_P (REGNO (reg
)))
8284 src_addr
= sse_addr
;
8285 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8289 src_addr
= int_addr
;
8290 src_offset
= REGNO (reg
) * 8;
8292 src_addr
= fold_convert (addr_type
, src_addr
);
8293 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8295 dest_addr
= fold_convert (daddr_type
, addr
);
8296 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8297 if (cur_size
== GET_MODE_SIZE (mode
))
8299 src
= build_va_arg_indirect_ref (src_addr
);
8300 dest
= build_va_arg_indirect_ref (dest_addr
);
8302 gimplify_assign (dest
, src
, pre_p
);
8307 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8308 3, dest_addr
, src_addr
,
8309 size_int (cur_size
));
8310 gimplify_and_add (copy
, pre_p
);
8312 prev_size
+= cur_size
;
8318 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8319 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8320 gimplify_assign (gpr
, t
, pre_p
);
8325 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8326 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8327 gimplify_assign (fpr
, t
, pre_p
);
8330 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8332 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8335 /* ... otherwise out of the overflow area. */
8337 /* When we align parameter on stack for caller, if the parameter
8338 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8339 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8340 here with caller. */
8341 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8342 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8343 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8345 /* Care for on-stack alignment if needed. */
8346 if (arg_boundary
<= 64 || size
== 0)
8350 HOST_WIDE_INT align
= arg_boundary
/ 8;
8351 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8352 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8353 build_int_cst (TREE_TYPE (t
), -align
));
8356 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8357 gimplify_assign (addr
, t
, pre_p
);
8359 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8360 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8363 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8365 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8366 addr
= fold_convert (ptrtype
, addr
);
8369 addr
= build_va_arg_indirect_ref (addr
);
8370 return build_va_arg_indirect_ref (addr
);
8373 /* Return true if OPNUM's MEM should be matched
8374 in movabs* patterns. */
8377 ix86_check_movabs (rtx insn
, int opnum
)
8381 set
= PATTERN (insn
);
8382 if (GET_CODE (set
) == PARALLEL
)
8383 set
= XVECEXP (set
, 0, 0);
8384 gcc_assert (GET_CODE (set
) == SET
);
8385 mem
= XEXP (set
, opnum
);
8386 while (GET_CODE (mem
) == SUBREG
)
8387 mem
= SUBREG_REG (mem
);
8388 gcc_assert (MEM_P (mem
));
8389 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8392 /* Initialize the table of extra 80387 mathematical constants. */
8395 init_ext_80387_constants (void)
8397 static const char * cst
[5] =
8399 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8400 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8401 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8402 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8403 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8407 for (i
= 0; i
< 5; i
++)
8409 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8410 /* Ensure each constant is rounded to XFmode precision. */
8411 real_convert (&ext_80387_constants_table
[i
],
8412 XFmode
, &ext_80387_constants_table
[i
]);
8415 ext_80387_constants_init
= 1;
8418 /* Return non-zero if the constant is something that
8419 can be loaded with a special instruction. */
8422 standard_80387_constant_p (rtx x
)
8424 enum machine_mode mode
= GET_MODE (x
);
8428 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8431 if (x
== CONST0_RTX (mode
))
8433 if (x
== CONST1_RTX (mode
))
8436 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8438 /* For XFmode constants, try to find a special 80387 instruction when
8439 optimizing for size or on those CPUs that benefit from them. */
8441 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8445 if (! ext_80387_constants_init
)
8446 init_ext_80387_constants ();
8448 for (i
= 0; i
< 5; i
++)
8449 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8453 /* Load of the constant -0.0 or -1.0 will be split as
8454 fldz;fchs or fld1;fchs sequence. */
8455 if (real_isnegzero (&r
))
8457 if (real_identical (&r
, &dconstm1
))
8463 /* Return the opcode of the special instruction to be used to load
8467 standard_80387_constant_opcode (rtx x
)
8469 switch (standard_80387_constant_p (x
))
8493 /* Return the CONST_DOUBLE representing the 80387 constant that is
8494 loaded by the specified special instruction. The argument IDX
8495 matches the return value from standard_80387_constant_p. */
8498 standard_80387_constant_rtx (int idx
)
8502 if (! ext_80387_constants_init
)
8503 init_ext_80387_constants ();
8519 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8523 /* Return 1 if X is all 0s and 2 if x is all 1s
8524 in supported SSE/AVX vector mode. */
8527 standard_sse_constant_p (rtx x
)
8529 enum machine_mode mode
= GET_MODE (x
);
8531 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8533 if (vector_all_ones_operand (x
, mode
))
8555 /* Return the opcode of the special instruction to be used to load
8559 standard_sse_constant_opcode (rtx insn
, rtx x
)
8561 switch (standard_sse_constant_p (x
))
8564 switch (get_attr_mode (insn
))
8567 return "%vpxor\t%0, %d0";
8569 return "%vxorpd\t%0, %d0";
8571 return "%vxorps\t%0, %d0";
8574 return "vpxor\t%x0, %x0, %x0";
8576 return "vxorpd\t%x0, %x0, %x0";
8578 return "vxorps\t%x0, %x0, %x0";
8586 return "vpcmpeqd\t%0, %0, %0";
8588 return "pcmpeqd\t%0, %0";
8596 /* Returns true if OP contains a symbol reference */
8599 symbolic_reference_mentioned_p (rtx op
)
8604 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8607 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8608 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8614 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8615 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8619 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8626 /* Return true if it is appropriate to emit `ret' instructions in the
8627 body of a function. Do this only if the epilogue is simple, needing a
8628 couple of insns. Prior to reloading, we can't tell how many registers
8629 must be saved, so return false then. Return false if there is no frame
8630 marker to de-allocate. */
8633 ix86_can_use_return_insn_p (void)
8635 struct ix86_frame frame
;
8637 if (! reload_completed
|| frame_pointer_needed
)
8640 /* Don't allow more than 32k pop, since that's all we can do
8641 with one instruction. */
8642 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8645 ix86_compute_frame_layout (&frame
);
8646 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8647 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8650 /* Value should be nonzero if functions must have frame pointers.
8651 Zero means the frame pointer need not be set up (and parms may
8652 be accessed via the stack pointer) in functions that seem suitable. */
8655 ix86_frame_pointer_required (void)
8657 /* If we accessed previous frames, then the generated code expects
8658 to be able to access the saved ebp value in our frame. */
8659 if (cfun
->machine
->accesses_prev_frame
)
8662 /* Several x86 os'es need a frame pointer for other reasons,
8663 usually pertaining to setjmp. */
8664 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8667 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8668 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8671 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8672 allocation is 4GB. */
8673 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8676 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8677 turns off the frame pointer by default. Turn it back on now if
8678 we've not got a leaf function. */
8679 if (TARGET_OMIT_LEAF_FRAME_POINTER
8681 || ix86_current_function_calls_tls_descriptor
))
8684 if (crtl
->profile
&& !flag_fentry
)
8690 /* Record that the current function accesses previous call frames. */
8693 ix86_setup_frame_addresses (void)
8695 cfun
->machine
->accesses_prev_frame
= 1;
8698 #ifndef USE_HIDDEN_LINKONCE
8699 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8700 # define USE_HIDDEN_LINKONCE 1
8702 # define USE_HIDDEN_LINKONCE 0
8706 static int pic_labels_used
;
8708 /* Fills in the label name that should be used for a pc thunk for
8709 the given register. */
8712 get_pc_thunk_name (char name
[32], unsigned int regno
)
8714 gcc_assert (!TARGET_64BIT
);
8716 if (USE_HIDDEN_LINKONCE
)
8717 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8719 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8723 /* This function generates code for -fpic that loads %ebx with
8724 the return address of the caller and then returns. */
8727 ix86_code_end (void)
8732 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8737 if (!(pic_labels_used
& (1 << regno
)))
8740 get_pc_thunk_name (name
, regno
);
8742 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8743 get_identifier (name
),
8744 build_function_type_list (void_type_node
, NULL_TREE
));
8745 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8746 NULL_TREE
, void_type_node
);
8747 TREE_PUBLIC (decl
) = 1;
8748 TREE_STATIC (decl
) = 1;
8749 DECL_IGNORED_P (decl
) = 1;
8754 switch_to_section (darwin_sections
[text_coal_section
]);
8755 fputs ("\t.weak_definition\t", asm_out_file
);
8756 assemble_name (asm_out_file
, name
);
8757 fputs ("\n\t.private_extern\t", asm_out_file
);
8758 assemble_name (asm_out_file
, name
);
8759 putc ('\n', asm_out_file
);
8760 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8761 DECL_WEAK (decl
) = 1;
8765 if (USE_HIDDEN_LINKONCE
)
8767 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8769 targetm
.asm_out
.unique_section (decl
, 0);
8770 switch_to_section (get_named_section (decl
, NULL
, 0));
8772 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8773 fputs ("\t.hidden\t", asm_out_file
);
8774 assemble_name (asm_out_file
, name
);
8775 putc ('\n', asm_out_file
);
8776 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8780 switch_to_section (text_section
);
8781 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8784 DECL_INITIAL (decl
) = make_node (BLOCK
);
8785 current_function_decl
= decl
;
8786 init_function_start (decl
);
8787 first_function_block_is_cold
= false;
8788 /* Make sure unwind info is emitted for the thunk if needed. */
8789 final_start_function (emit_barrier (), asm_out_file
, 1);
8791 /* Pad stack IP move with 4 instructions (two NOPs count
8792 as one instruction). */
8793 if (TARGET_PAD_SHORT_FUNCTION
)
8798 fputs ("\tnop\n", asm_out_file
);
8801 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8802 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8803 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8804 fputs ("\tret\n", asm_out_file
);
8805 final_end_function ();
8806 init_insn_lengths ();
8807 free_after_compilation (cfun
);
8809 current_function_decl
= NULL
;
8812 if (flag_split_stack
)
8813 file_end_indicate_split_stack ();
8816 /* Emit code for the SET_GOT patterns. */
8819 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8825 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8827 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8828 xops
[2] = gen_rtx_MEM (Pmode
,
8829 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8830 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8832 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8833 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8834 an unadorned address. */
8835 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8836 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8837 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8841 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8845 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8847 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8850 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8851 is what will be referenced by the Mach-O PIC subsystem. */
8853 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8856 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8857 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8862 get_pc_thunk_name (name
, REGNO (dest
));
8863 pic_labels_used
|= 1 << REGNO (dest
);
8865 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8866 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8867 output_asm_insn ("call\t%X2", xops
);
8868 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8869 is what will be referenced by the Mach-O PIC subsystem. */
8872 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8874 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8875 CODE_LABEL_NUMBER (label
));
8880 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8885 /* Generate an "push" pattern for input ARG. */
8890 struct machine_function
*m
= cfun
->machine
;
8892 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8893 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8894 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8896 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8897 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8899 return gen_rtx_SET (VOIDmode
,
8900 gen_rtx_MEM (word_mode
,
8901 gen_rtx_PRE_DEC (Pmode
,
8902 stack_pointer_rtx
)),
8906 /* Generate an "pop" pattern for input ARG. */
8911 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8912 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8914 return gen_rtx_SET (VOIDmode
,
8916 gen_rtx_MEM (word_mode
,
8917 gen_rtx_POST_INC (Pmode
,
8918 stack_pointer_rtx
)));
8921 /* Return >= 0 if there is an unused call-clobbered register available
8922 for the entire function. */
8925 ix86_select_alt_pic_regnum (void)
8929 && !ix86_current_function_calls_tls_descriptor
)
8932 /* Can't use the same register for both PIC and DRAP. */
8934 drap
= REGNO (crtl
->drap_reg
);
8937 for (i
= 2; i
>= 0; --i
)
8938 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8942 return INVALID_REGNUM
;
8945 /* Return TRUE if we need to save REGNO. */
8948 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8950 if (pic_offset_table_rtx
8951 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8952 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8954 || crtl
->calls_eh_return
8955 || crtl
->uses_const_pool
))
8956 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8958 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8963 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8964 if (test
== INVALID_REGNUM
)
8971 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8974 return (df_regs_ever_live_p (regno
)
8975 && !call_used_regs
[regno
]
8976 && !fixed_regs
[regno
]
8977 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8980 /* Return number of saved general prupose registers. */
8983 ix86_nsaved_regs (void)
8988 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8989 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8994 /* Return number of saved SSE registrers. */
8997 ix86_nsaved_sseregs (void)
9002 if (!TARGET_64BIT_MS_ABI
)
9004 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9005 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9010 /* Given FROM and TO register numbers, say whether this elimination is
9011 allowed. If stack alignment is needed, we can only replace argument
9012 pointer with hard frame pointer, or replace frame pointer with stack
9013 pointer. Otherwise, frame pointer elimination is automatically
9014 handled and all other eliminations are valid. */
9017 ix86_can_eliminate (const int from
, const int to
)
9019 if (stack_realign_fp
)
9020 return ((from
== ARG_POINTER_REGNUM
9021 && to
== HARD_FRAME_POINTER_REGNUM
)
9022 || (from
== FRAME_POINTER_REGNUM
9023 && to
== STACK_POINTER_REGNUM
));
9025 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9028 /* Return the offset between two registers, one to be eliminated, and the other
9029 its replacement, at the start of a routine. */
9032 ix86_initial_elimination_offset (int from
, int to
)
9034 struct ix86_frame frame
;
9035 ix86_compute_frame_layout (&frame
);
9037 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9038 return frame
.hard_frame_pointer_offset
;
9039 else if (from
== FRAME_POINTER_REGNUM
9040 && to
== HARD_FRAME_POINTER_REGNUM
)
9041 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9044 gcc_assert (to
== STACK_POINTER_REGNUM
);
9046 if (from
== ARG_POINTER_REGNUM
)
9047 return frame
.stack_pointer_offset
;
9049 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9050 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9054 /* In a dynamically-aligned function, we can't know the offset from
9055 stack pointer to frame pointer, so we must ensure that setjmp
9056 eliminates fp against the hard fp (%ebp) rather than trying to
9057 index from %esp up to the top of the frame across a gap that is
9058 of unknown (at compile-time) size. */
9060 ix86_builtin_setjmp_frame_value (void)
9062 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9065 /* When using -fsplit-stack, the allocation routines set a field in
9066 the TCB to the bottom of the stack plus this much space, measured
9069 #define SPLIT_STACK_AVAILABLE 256
9071 /* Fill structure ix86_frame about frame of currently computed function. */
9074 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9076 unsigned HOST_WIDE_INT stack_alignment_needed
;
9077 HOST_WIDE_INT offset
;
9078 unsigned HOST_WIDE_INT preferred_alignment
;
9079 HOST_WIDE_INT size
= get_frame_size ();
9080 HOST_WIDE_INT to_allocate
;
9082 frame
->nregs
= ix86_nsaved_regs ();
9083 frame
->nsseregs
= ix86_nsaved_sseregs ();
9085 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9086 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9088 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9089 function prologues and leaf. */
9090 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9091 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9092 || ix86_current_function_calls_tls_descriptor
))
9094 preferred_alignment
= 16;
9095 stack_alignment_needed
= 16;
9096 crtl
->preferred_stack_boundary
= 128;
9097 crtl
->stack_alignment_needed
= 128;
9100 gcc_assert (!size
|| stack_alignment_needed
);
9101 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9102 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9104 /* For SEH we have to limit the amount of code movement into the prologue.
9105 At present we do this via a BLOCKAGE, at which point there's very little
9106 scheduling that can be done, which means that there's very little point
9107 in doing anything except PUSHs. */
9109 cfun
->machine
->use_fast_prologue_epilogue
= false;
9111 /* During reload iteration the amount of registers saved can change.
9112 Recompute the value as needed. Do not recompute when amount of registers
9113 didn't change as reload does multiple calls to the function and does not
9114 expect the decision to change within single iteration. */
9115 else if (!optimize_function_for_size_p (cfun
)
9116 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9118 int count
= frame
->nregs
;
9119 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9121 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9123 /* The fast prologue uses move instead of push to save registers. This
9124 is significantly longer, but also executes faster as modern hardware
9125 can execute the moves in parallel, but can't do that for push/pop.
9127 Be careful about choosing what prologue to emit: When function takes
9128 many instructions to execute we may use slow version as well as in
9129 case function is known to be outside hot spot (this is known with
9130 feedback only). Weight the size of function by number of registers
9131 to save as it is cheap to use one or two push instructions but very
9132 slow to use many of them. */
9134 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9135 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9136 || (flag_branch_probabilities
9137 && node
->frequency
< NODE_FREQUENCY_HOT
))
9138 cfun
->machine
->use_fast_prologue_epilogue
= false;
9140 cfun
->machine
->use_fast_prologue_epilogue
9141 = !expensive_function_p (count
);
9144 frame
->save_regs_using_mov
9145 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9146 /* If static stack checking is enabled and done with probes,
9147 the registers need to be saved before allocating the frame. */
9148 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9150 /* Skip return address. */
9151 offset
= UNITS_PER_WORD
;
9153 /* Skip pushed static chain. */
9154 if (ix86_static_chain_on_stack
)
9155 offset
+= UNITS_PER_WORD
;
9157 /* Skip saved base pointer. */
9158 if (frame_pointer_needed
)
9159 offset
+= UNITS_PER_WORD
;
9160 frame
->hfp_save_offset
= offset
;
9162 /* The traditional frame pointer location is at the top of the frame. */
9163 frame
->hard_frame_pointer_offset
= offset
;
9165 /* Register save area */
9166 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9167 frame
->reg_save_offset
= offset
;
9169 /* On SEH target, registers are pushed just before the frame pointer
9172 frame
->hard_frame_pointer_offset
= offset
;
9174 /* Align and set SSE register save area. */
9175 if (frame
->nsseregs
)
9177 /* The only ABI that has saved SSE registers (Win64) also has a
9178 16-byte aligned default stack, and thus we don't need to be
9179 within the re-aligned local stack frame to save them. */
9180 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9181 offset
= (offset
+ 16 - 1) & -16;
9182 offset
+= frame
->nsseregs
* 16;
9184 frame
->sse_reg_save_offset
= offset
;
9186 /* The re-aligned stack starts here. Values before this point are not
9187 directly comparable with values below this point. In order to make
9188 sure that no value happens to be the same before and after, force
9189 the alignment computation below to add a non-zero value. */
9190 if (stack_realign_fp
)
9191 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9194 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9195 offset
+= frame
->va_arg_size
;
9197 /* Align start of frame for local function. */
9198 if (stack_realign_fp
9199 || offset
!= frame
->sse_reg_save_offset
9202 || cfun
->calls_alloca
9203 || ix86_current_function_calls_tls_descriptor
)
9204 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9206 /* Frame pointer points here. */
9207 frame
->frame_pointer_offset
= offset
;
9211 /* Add outgoing arguments area. Can be skipped if we eliminated
9212 all the function calls as dead code.
9213 Skipping is however impossible when function calls alloca. Alloca
9214 expander assumes that last crtl->outgoing_args_size
9215 of stack frame are unused. */
9216 if (ACCUMULATE_OUTGOING_ARGS
9217 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9218 || ix86_current_function_calls_tls_descriptor
))
9220 offset
+= crtl
->outgoing_args_size
;
9221 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9224 frame
->outgoing_arguments_size
= 0;
9226 /* Align stack boundary. Only needed if we're calling another function
9228 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9229 || ix86_current_function_calls_tls_descriptor
)
9230 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9232 /* We've reached end of stack frame. */
9233 frame
->stack_pointer_offset
= offset
;
9235 /* Size prologue needs to allocate. */
9236 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9238 if ((!to_allocate
&& frame
->nregs
<= 1)
9239 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9240 frame
->save_regs_using_mov
= false;
9242 if (ix86_using_red_zone ()
9243 && crtl
->sp_is_unchanging
9245 && !ix86_current_function_calls_tls_descriptor
)
9247 frame
->red_zone_size
= to_allocate
;
9248 if (frame
->save_regs_using_mov
)
9249 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9250 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9251 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9254 frame
->red_zone_size
= 0;
9255 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9257 /* The SEH frame pointer location is near the bottom of the frame.
9258 This is enforced by the fact that the difference between the
9259 stack pointer and the frame pointer is limited to 240 bytes in
9260 the unwind data structure. */
9265 /* If we can leave the frame pointer where it is, do so. Also, returns
9266 the establisher frame for __builtin_frame_address (0). */
9267 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9268 if (diff
<= SEH_MAX_FRAME_SIZE
9269 && (diff
> 240 || (diff
& 15) != 0)
9270 && !crtl
->accesses_prior_frames
)
9272 /* Ideally we'd determine what portion of the local stack frame
9273 (within the constraint of the lowest 240) is most heavily used.
9274 But without that complication, simply bias the frame pointer
9275 by 128 bytes so as to maximize the amount of the local stack
9276 frame that is addressable with 8-bit offsets. */
9277 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9282 /* This is semi-inlined memory_address_length, but simplified
9283 since we know that we're always dealing with reg+offset, and
9284 to avoid having to create and discard all that rtl. */
9287 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9293 /* EBP and R13 cannot be encoded without an offset. */
9294 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9296 else if (IN_RANGE (offset
, -128, 127))
9299 /* ESP and R12 must be encoded with a SIB byte. */
9300 if (regno
== SP_REG
|| regno
== R12_REG
)
9306 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9307 The valid base registers are taken from CFUN->MACHINE->FS. */
9310 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9312 const struct machine_function
*m
= cfun
->machine
;
9313 rtx base_reg
= NULL
;
9314 HOST_WIDE_INT base_offset
= 0;
9316 if (m
->use_fast_prologue_epilogue
)
9318 /* Choose the base register most likely to allow the most scheduling
9319 opportunities. Generally FP is valid throughout the function,
9320 while DRAP must be reloaded within the epilogue. But choose either
9321 over the SP due to increased encoding size. */
9325 base_reg
= hard_frame_pointer_rtx
;
9326 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9328 else if (m
->fs
.drap_valid
)
9330 base_reg
= crtl
->drap_reg
;
9331 base_offset
= 0 - cfa_offset
;
9333 else if (m
->fs
.sp_valid
)
9335 base_reg
= stack_pointer_rtx
;
9336 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9341 HOST_WIDE_INT toffset
;
9344 /* Choose the base register with the smallest address encoding.
9345 With a tie, choose FP > DRAP > SP. */
9348 base_reg
= stack_pointer_rtx
;
9349 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9350 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9352 if (m
->fs
.drap_valid
)
9354 toffset
= 0 - cfa_offset
;
9355 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9358 base_reg
= crtl
->drap_reg
;
9359 base_offset
= toffset
;
9365 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9366 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9369 base_reg
= hard_frame_pointer_rtx
;
9370 base_offset
= toffset
;
9375 gcc_assert (base_reg
!= NULL
);
9377 return plus_constant (Pmode
, base_reg
, base_offset
);
9380 /* Emit code to save registers in the prologue. */
9383 ix86_emit_save_regs (void)
9388 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9389 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9391 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9392 RTX_FRAME_RELATED_P (insn
) = 1;
9396 /* Emit a single register save at CFA - CFA_OFFSET. */
9399 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9400 HOST_WIDE_INT cfa_offset
)
9402 struct machine_function
*m
= cfun
->machine
;
9403 rtx reg
= gen_rtx_REG (mode
, regno
);
9404 rtx mem
, addr
, base
, insn
;
9406 addr
= choose_baseaddr (cfa_offset
);
9407 mem
= gen_frame_mem (mode
, addr
);
9409 /* For SSE saves, we need to indicate the 128-bit alignment. */
9410 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9412 insn
= emit_move_insn (mem
, reg
);
9413 RTX_FRAME_RELATED_P (insn
) = 1;
9416 if (GET_CODE (base
) == PLUS
)
9417 base
= XEXP (base
, 0);
9418 gcc_checking_assert (REG_P (base
));
9420 /* When saving registers into a re-aligned local stack frame, avoid
9421 any tricky guessing by dwarf2out. */
9422 if (m
->fs
.realigned
)
9424 gcc_checking_assert (stack_realign_drap
);
9426 if (regno
== REGNO (crtl
->drap_reg
))
9428 /* A bit of a hack. We force the DRAP register to be saved in
9429 the re-aligned stack frame, which provides us with a copy
9430 of the CFA that will last past the prologue. Install it. */
9431 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9432 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9433 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9434 mem
= gen_rtx_MEM (mode
, addr
);
9435 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9439 /* The frame pointer is a stable reference within the
9440 aligned frame. Use it. */
9441 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9442 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9443 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9444 mem
= gen_rtx_MEM (mode
, addr
);
9445 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9446 gen_rtx_SET (VOIDmode
, mem
, reg
));
9450 /* The memory may not be relative to the current CFA register,
9451 which means that we may need to generate a new pattern for
9452 use by the unwind info. */
9453 else if (base
!= m
->fs
.cfa_reg
)
9455 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9456 m
->fs
.cfa_offset
- cfa_offset
);
9457 mem
= gen_rtx_MEM (mode
, addr
);
9458 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9462 /* Emit code to save registers using MOV insns.
9463 First register is stored at CFA - CFA_OFFSET. */
9465 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9469 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9470 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9472 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9473 cfa_offset
-= UNITS_PER_WORD
;
9477 /* Emit code to save SSE registers using MOV insns.
9478 First register is stored at CFA - CFA_OFFSET. */
9480 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9484 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9485 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9487 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9492 static GTY(()) rtx queued_cfa_restores
;
9494 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9495 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9496 Don't add the note if the previously saved value will be left untouched
9497 within stack red-zone till return, as unwinders can find the same value
9498 in the register and on the stack. */
9501 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9503 if (!crtl
->shrink_wrapped
9504 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9509 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9510 RTX_FRAME_RELATED_P (insn
) = 1;
9514 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9517 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9520 ix86_add_queued_cfa_restore_notes (rtx insn
)
9523 if (!queued_cfa_restores
)
9525 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9527 XEXP (last
, 1) = REG_NOTES (insn
);
9528 REG_NOTES (insn
) = queued_cfa_restores
;
9529 queued_cfa_restores
= NULL_RTX
;
9530 RTX_FRAME_RELATED_P (insn
) = 1;
9533 /* Expand prologue or epilogue stack adjustment.
9534 The pattern exist to put a dependency on all ebp-based memory accesses.
9535 STYLE should be negative if instructions should be marked as frame related,
9536 zero if %r11 register is live and cannot be freely used and positive
9540 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9541 int style
, bool set_cfa
)
9543 struct machine_function
*m
= cfun
->machine
;
9545 bool add_frame_related_expr
= false;
9547 if (Pmode
== SImode
)
9548 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9549 else if (x86_64_immediate_operand (offset
, DImode
))
9550 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9554 /* r11 is used by indirect sibcall return as well, set before the
9555 epilogue and used after the epilogue. */
9557 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9560 gcc_assert (src
!= hard_frame_pointer_rtx
9561 && dest
!= hard_frame_pointer_rtx
);
9562 tmp
= hard_frame_pointer_rtx
;
9564 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9566 add_frame_related_expr
= true;
9568 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9571 insn
= emit_insn (insn
);
9573 ix86_add_queued_cfa_restore_notes (insn
);
9579 gcc_assert (m
->fs
.cfa_reg
== src
);
9580 m
->fs
.cfa_offset
+= INTVAL (offset
);
9581 m
->fs
.cfa_reg
= dest
;
9583 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9584 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9585 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9586 RTX_FRAME_RELATED_P (insn
) = 1;
9590 RTX_FRAME_RELATED_P (insn
) = 1;
9591 if (add_frame_related_expr
)
9593 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9594 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9595 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9599 if (dest
== stack_pointer_rtx
)
9601 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9602 bool valid
= m
->fs
.sp_valid
;
9604 if (src
== hard_frame_pointer_rtx
)
9606 valid
= m
->fs
.fp_valid
;
9607 ooffset
= m
->fs
.fp_offset
;
9609 else if (src
== crtl
->drap_reg
)
9611 valid
= m
->fs
.drap_valid
;
9616 /* Else there are two possibilities: SP itself, which we set
9617 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9618 taken care of this by hand along the eh_return path. */
9619 gcc_checking_assert (src
== stack_pointer_rtx
9620 || offset
== const0_rtx
);
9623 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9624 m
->fs
.sp_valid
= valid
;
9628 /* Find an available register to be used as dynamic realign argument
9629 pointer regsiter. Such a register will be written in prologue and
9630 used in begin of body, so it must not be
9631 1. parameter passing register.
9633 We reuse static-chain register if it is available. Otherwise, we
9634 use DI for i386 and R13 for x86-64. We chose R13 since it has
9637 Return: the regno of chosen register. */
9640 find_drap_reg (void)
9642 tree decl
= cfun
->decl
;
9646 /* Use R13 for nested function or function need static chain.
9647 Since function with tail call may use any caller-saved
9648 registers in epilogue, DRAP must not use caller-saved
9649 register in such case. */
9650 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9657 /* Use DI for nested function or function need static chain.
9658 Since function with tail call may use any caller-saved
9659 registers in epilogue, DRAP must not use caller-saved
9660 register in such case. */
9661 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9664 /* Reuse static chain register if it isn't used for parameter
9666 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9668 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9669 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9676 /* Return minimum incoming stack alignment. */
9679 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9681 unsigned int incoming_stack_boundary
;
9683 /* Prefer the one specified at command line. */
9684 if (ix86_user_incoming_stack_boundary
)
9685 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9686 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9687 if -mstackrealign is used, it isn't used for sibcall check and
9688 estimated stack alignment is 128bit. */
9691 && ix86_force_align_arg_pointer
9692 && crtl
->stack_alignment_estimated
== 128)
9693 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9695 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9697 /* Incoming stack alignment can be changed on individual functions
9698 via force_align_arg_pointer attribute. We use the smallest
9699 incoming stack boundary. */
9700 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9701 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9702 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9703 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9705 /* The incoming stack frame has to be aligned at least at
9706 parm_stack_boundary. */
9707 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9708 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9710 /* Stack at entrance of main is aligned by runtime. We use the
9711 smallest incoming stack boundary. */
9712 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9713 && DECL_NAME (current_function_decl
)
9714 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9715 && DECL_FILE_SCOPE_P (current_function_decl
))
9716 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9718 return incoming_stack_boundary
;
9721 /* Update incoming stack boundary and estimated stack alignment. */
9724 ix86_update_stack_boundary (void)
9726 ix86_incoming_stack_boundary
9727 = ix86_minimum_incoming_stack_boundary (false);
9729 /* x86_64 vararg needs 16byte stack alignment for register save
9733 && crtl
->stack_alignment_estimated
< 128)
9734 crtl
->stack_alignment_estimated
= 128;
9737 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9738 needed or an rtx for DRAP otherwise. */
9741 ix86_get_drap_rtx (void)
9743 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9744 crtl
->need_drap
= true;
9746 if (stack_realign_drap
)
9748 /* Assign DRAP to vDRAP and returns vDRAP */
9749 unsigned int regno
= find_drap_reg ();
9754 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9755 crtl
->drap_reg
= arg_ptr
;
9758 drap_vreg
= copy_to_reg (arg_ptr
);
9762 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9765 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9766 RTX_FRAME_RELATED_P (insn
) = 1;
9774 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9777 ix86_internal_arg_pointer (void)
9779 return virtual_incoming_args_rtx
;
9782 struct scratch_reg
{
9787 /* Return a short-lived scratch register for use on function entry.
9788 In 32-bit mode, it is valid only after the registers are saved
9789 in the prologue. This register must be released by means of
9790 release_scratch_register_on_entry once it is dead. */
9793 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9801 /* We always use R11 in 64-bit mode. */
9806 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9808 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9809 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9810 int regparm
= ix86_function_regparm (fntype
, decl
);
9812 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9814 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9815 for the static chain register. */
9816 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9817 && drap_regno
!= AX_REG
)
9819 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9821 /* ecx is the static chain register. */
9822 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9823 && drap_regno
!= CX_REG
)
9825 else if (ix86_save_reg (BX_REG
, true))
9827 /* esi is the static chain register. */
9828 else if (!(regparm
== 3 && static_chain_p
)
9829 && ix86_save_reg (SI_REG
, true))
9831 else if (ix86_save_reg (DI_REG
, true))
9835 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9840 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9843 rtx insn
= emit_insn (gen_push (sr
->reg
));
9844 RTX_FRAME_RELATED_P (insn
) = 1;
9848 /* Release a scratch register obtained from the preceding function. */
9851 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9855 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9857 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9858 RTX_FRAME_RELATED_P (insn
) = 1;
9859 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9860 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9861 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9865 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9867 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9870 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9872 /* We skip the probe for the first interval + a small dope of 4 words and
9873 probe that many bytes past the specified size to maintain a protection
9874 area at the botton of the stack. */
9875 const int dope
= 4 * UNITS_PER_WORD
;
9876 rtx size_rtx
= GEN_INT (size
), last
;
9878 /* See if we have a constant small number of probes to generate. If so,
9879 that's the easy case. The run-time loop is made up of 11 insns in the
9880 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9881 for n # of intervals. */
9882 if (size
<= 5 * PROBE_INTERVAL
)
9884 HOST_WIDE_INT i
, adjust
;
9885 bool first_probe
= true;
9887 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9888 values of N from 1 until it exceeds SIZE. If only one probe is
9889 needed, this will not generate any code. Then adjust and probe
9890 to PROBE_INTERVAL + SIZE. */
9891 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9895 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9896 first_probe
= false;
9899 adjust
= PROBE_INTERVAL
;
9901 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9902 plus_constant (Pmode
, stack_pointer_rtx
,
9904 emit_stack_probe (stack_pointer_rtx
);
9908 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9910 adjust
= size
+ PROBE_INTERVAL
- i
;
9912 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9913 plus_constant (Pmode
, stack_pointer_rtx
,
9915 emit_stack_probe (stack_pointer_rtx
);
9917 /* Adjust back to account for the additional first interval. */
9918 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9919 plus_constant (Pmode
, stack_pointer_rtx
,
9920 PROBE_INTERVAL
+ dope
)));
9923 /* Otherwise, do the same as above, but in a loop. Note that we must be
9924 extra careful with variables wrapping around because we might be at
9925 the very top (or the very bottom) of the address space and we have
9926 to be able to handle this case properly; in particular, we use an
9927 equality test for the loop condition. */
9930 HOST_WIDE_INT rounded_size
;
9931 struct scratch_reg sr
;
9933 get_scratch_register_on_entry (&sr
);
9936 /* Step 1: round SIZE to the previous multiple of the interval. */
9938 rounded_size
= size
& -PROBE_INTERVAL
;
9941 /* Step 2: compute initial and final value of the loop counter. */
9943 /* SP = SP_0 + PROBE_INTERVAL. */
9944 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9945 plus_constant (Pmode
, stack_pointer_rtx
,
9946 - (PROBE_INTERVAL
+ dope
))));
9948 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9949 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9950 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9951 gen_rtx_PLUS (Pmode
, sr
.reg
,
9952 stack_pointer_rtx
)));
9957 while (SP != LAST_ADDR)
9959 SP = SP + PROBE_INTERVAL
9963 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9964 values of N from 1 until it is equal to ROUNDED_SIZE. */
9966 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9969 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9970 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9972 if (size
!= rounded_size
)
9974 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9975 plus_constant (Pmode
, stack_pointer_rtx
,
9976 rounded_size
- size
)));
9977 emit_stack_probe (stack_pointer_rtx
);
9980 /* Adjust back to account for the additional first interval. */
9981 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9982 plus_constant (Pmode
, stack_pointer_rtx
,
9983 PROBE_INTERVAL
+ dope
)));
9985 release_scratch_register_on_entry (&sr
);
9988 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9990 /* Even if the stack pointer isn't the CFA register, we need to correctly
9991 describe the adjustments made to it, in particular differentiate the
9992 frame-related ones from the frame-unrelated ones. */
9995 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9996 XVECEXP (expr
, 0, 0)
9997 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9998 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9999 XVECEXP (expr
, 0, 1)
10000 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10001 plus_constant (Pmode
, stack_pointer_rtx
,
10002 PROBE_INTERVAL
+ dope
+ size
));
10003 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10004 RTX_FRAME_RELATED_P (last
) = 1;
10006 cfun
->machine
->fs
.sp_offset
+= size
;
10009 /* Make sure nothing is scheduled before we are done. */
10010 emit_insn (gen_blockage ());
10013 /* Adjust the stack pointer up to REG while probing it. */
10016 output_adjust_stack_and_probe (rtx reg
)
10018 static int labelno
= 0;
10019 char loop_lab
[32], end_lab
[32];
10022 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10023 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10025 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10027 /* Jump to END_LAB if SP == LAST_ADDR. */
10028 xops
[0] = stack_pointer_rtx
;
10030 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10031 fputs ("\tje\t", asm_out_file
);
10032 assemble_name_raw (asm_out_file
, end_lab
);
10033 fputc ('\n', asm_out_file
);
10035 /* SP = SP + PROBE_INTERVAL. */
10036 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10037 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10040 xops
[1] = const0_rtx
;
10041 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10043 fprintf (asm_out_file
, "\tjmp\t");
10044 assemble_name_raw (asm_out_file
, loop_lab
);
10045 fputc ('\n', asm_out_file
);
10047 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10052 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10053 inclusive. These are offsets from the current stack pointer. */
10056 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10058 /* See if we have a constant small number of probes to generate. If so,
10059 that's the easy case. The run-time loop is made up of 7 insns in the
10060 generic case while the compile-time loop is made up of n insns for n #
10062 if (size
<= 7 * PROBE_INTERVAL
)
10066 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10067 it exceeds SIZE. If only one probe is needed, this will not
10068 generate any code. Then probe at FIRST + SIZE. */
10069 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10070 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10073 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10077 /* Otherwise, do the same as above, but in a loop. Note that we must be
10078 extra careful with variables wrapping around because we might be at
10079 the very top (or the very bottom) of the address space and we have
10080 to be able to handle this case properly; in particular, we use an
10081 equality test for the loop condition. */
10084 HOST_WIDE_INT rounded_size
, last
;
10085 struct scratch_reg sr
;
10087 get_scratch_register_on_entry (&sr
);
10090 /* Step 1: round SIZE to the previous multiple of the interval. */
10092 rounded_size
= size
& -PROBE_INTERVAL
;
10095 /* Step 2: compute initial and final value of the loop counter. */
10097 /* TEST_OFFSET = FIRST. */
10098 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10100 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10101 last
= first
+ rounded_size
;
10104 /* Step 3: the loop
10106 while (TEST_ADDR != LAST_ADDR)
10108 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10112 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10113 until it is equal to ROUNDED_SIZE. */
10115 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10118 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10119 that SIZE is equal to ROUNDED_SIZE. */
10121 if (size
!= rounded_size
)
10122 emit_stack_probe (plus_constant (Pmode
,
10123 gen_rtx_PLUS (Pmode
,
10126 rounded_size
- size
));
10128 release_scratch_register_on_entry (&sr
);
10131 /* Make sure nothing is scheduled before we are done. */
10132 emit_insn (gen_blockage ());
10135 /* Probe a range of stack addresses from REG to END, inclusive. These are
10136 offsets from the current stack pointer. */
10139 output_probe_stack_range (rtx reg
, rtx end
)
10141 static int labelno
= 0;
10142 char loop_lab
[32], end_lab
[32];
10145 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10146 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10148 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10150 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10153 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10154 fputs ("\tje\t", asm_out_file
);
10155 assemble_name_raw (asm_out_file
, end_lab
);
10156 fputc ('\n', asm_out_file
);
10158 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10159 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10160 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10162 /* Probe at TEST_ADDR. */
10163 xops
[0] = stack_pointer_rtx
;
10165 xops
[2] = const0_rtx
;
10166 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10168 fprintf (asm_out_file
, "\tjmp\t");
10169 assemble_name_raw (asm_out_file
, loop_lab
);
10170 fputc ('\n', asm_out_file
);
10172 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10177 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10178 to be generated in correct form. */
10180 ix86_finalize_stack_realign_flags (void)
10182 /* Check if stack realign is really needed after reload, and
10183 stores result in cfun */
10184 unsigned int incoming_stack_boundary
10185 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10186 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10187 unsigned int stack_realign
= (incoming_stack_boundary
10189 ? crtl
->max_used_stack_slot_alignment
10190 : crtl
->stack_alignment_needed
));
10192 if (crtl
->stack_realign_finalized
)
10194 /* After stack_realign_needed is finalized, we can't no longer
10196 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10200 /* If the only reason for frame_pointer_needed is that we conservatively
10201 assumed stack realignment might be needed, but in the end nothing that
10202 needed the stack alignment had been spilled, clear frame_pointer_needed
10203 and say we don't need stack realignment. */
10205 && !crtl
->need_drap
10206 && frame_pointer_needed
10208 && flag_omit_frame_pointer
10209 && crtl
->sp_is_unchanging
10210 && !ix86_current_function_calls_tls_descriptor
10211 && !crtl
->accesses_prior_frames
10212 && !cfun
->calls_alloca
10213 && !crtl
->calls_eh_return
10214 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10215 && !ix86_frame_pointer_required ()
10216 && get_frame_size () == 0
10217 && ix86_nsaved_sseregs () == 0
10218 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10220 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10223 CLEAR_HARD_REG_SET (prologue_used
);
10224 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10225 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10226 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10227 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10228 HARD_FRAME_POINTER_REGNUM
);
10232 FOR_BB_INSNS (bb
, insn
)
10233 if (NONDEBUG_INSN_P (insn
)
10234 && requires_stack_frame_p (insn
, prologue_used
,
10235 set_up_by_prologue
))
10237 crtl
->stack_realign_needed
= stack_realign
;
10238 crtl
->stack_realign_finalized
= true;
10243 frame_pointer_needed
= false;
10244 stack_realign
= false;
10245 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10246 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10247 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10248 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10249 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10250 df_finish_pass (true);
10251 df_scan_alloc (NULL
);
10253 df_compute_regs_ever_live (true);
10257 crtl
->stack_realign_needed
= stack_realign
;
10258 crtl
->stack_realign_finalized
= true;
10261 /* Expand the prologue into a bunch of separate insns. */
10264 ix86_expand_prologue (void)
10266 struct machine_function
*m
= cfun
->machine
;
10269 struct ix86_frame frame
;
10270 HOST_WIDE_INT allocate
;
10271 bool int_registers_saved
;
10272 bool sse_registers_saved
;
10274 ix86_finalize_stack_realign_flags ();
10276 /* DRAP should not coexist with stack_realign_fp */
10277 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10279 memset (&m
->fs
, 0, sizeof (m
->fs
));
10281 /* Initialize CFA state for before the prologue. */
10282 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10283 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10285 /* Track SP offset to the CFA. We continue tracking this after we've
10286 swapped the CFA register away from SP. In the case of re-alignment
10287 this is fudged; we're interested to offsets within the local frame. */
10288 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10289 m
->fs
.sp_valid
= true;
10291 ix86_compute_frame_layout (&frame
);
10293 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10295 /* We should have already generated an error for any use of
10296 ms_hook on a nested function. */
10297 gcc_checking_assert (!ix86_static_chain_on_stack
);
10299 /* Check if profiling is active and we shall use profiling before
10300 prologue variant. If so sorry. */
10301 if (crtl
->profile
&& flag_fentry
!= 0)
10302 sorry ("ms_hook_prologue attribute isn%'t compatible "
10303 "with -mfentry for 32-bit");
10305 /* In ix86_asm_output_function_label we emitted:
10306 8b ff movl.s %edi,%edi
10308 8b ec movl.s %esp,%ebp
10310 This matches the hookable function prologue in Win32 API
10311 functions in Microsoft Windows XP Service Pack 2 and newer.
10312 Wine uses this to enable Windows apps to hook the Win32 API
10313 functions provided by Wine.
10315 What that means is that we've already set up the frame pointer. */
10317 if (frame_pointer_needed
10318 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10322 /* We've decided to use the frame pointer already set up.
10323 Describe this to the unwinder by pretending that both
10324 push and mov insns happen right here.
10326 Putting the unwind info here at the end of the ms_hook
10327 is done so that we can make absolutely certain we get
10328 the required byte sequence at the start of the function,
10329 rather than relying on an assembler that can produce
10330 the exact encoding required.
10332 However it does mean (in the unpatched case) that we have
10333 a 1 insn window where the asynchronous unwind info is
10334 incorrect. However, if we placed the unwind info at
10335 its correct location we would have incorrect unwind info
10336 in the patched case. Which is probably all moot since
10337 I don't expect Wine generates dwarf2 unwind info for the
10338 system libraries that use this feature. */
10340 insn
= emit_insn (gen_blockage ());
10342 push
= gen_push (hard_frame_pointer_rtx
);
10343 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10344 stack_pointer_rtx
);
10345 RTX_FRAME_RELATED_P (push
) = 1;
10346 RTX_FRAME_RELATED_P (mov
) = 1;
10348 RTX_FRAME_RELATED_P (insn
) = 1;
10349 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10350 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10352 /* Note that gen_push incremented m->fs.cfa_offset, even
10353 though we didn't emit the push insn here. */
10354 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10355 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10356 m
->fs
.fp_valid
= true;
10360 /* The frame pointer is not needed so pop %ebp again.
10361 This leaves us with a pristine state. */
10362 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10366 /* The first insn of a function that accepts its static chain on the
10367 stack is to push the register that would be filled in by a direct
10368 call. This insn will be skipped by the trampoline. */
10369 else if (ix86_static_chain_on_stack
)
10371 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10372 emit_insn (gen_blockage ());
10374 /* We don't want to interpret this push insn as a register save,
10375 only as a stack adjustment. The real copy of the register as
10376 a save will be done later, if needed. */
10377 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10378 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10379 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10380 RTX_FRAME_RELATED_P (insn
) = 1;
10383 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10384 of DRAP is needed and stack realignment is really needed after reload */
10385 if (stack_realign_drap
)
10387 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10389 /* Only need to push parameter pointer reg if it is caller saved. */
10390 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10392 /* Push arg pointer reg */
10393 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10394 RTX_FRAME_RELATED_P (insn
) = 1;
10397 /* Grab the argument pointer. */
10398 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10399 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10400 RTX_FRAME_RELATED_P (insn
) = 1;
10401 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10402 m
->fs
.cfa_offset
= 0;
10404 /* Align the stack. */
10405 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10407 GEN_INT (-align_bytes
)));
10408 RTX_FRAME_RELATED_P (insn
) = 1;
10410 /* Replicate the return address on the stack so that return
10411 address can be reached via (argp - 1) slot. This is needed
10412 to implement macro RETURN_ADDR_RTX and intrinsic function
10413 expand_builtin_return_addr etc. */
10414 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10415 t
= gen_frame_mem (word_mode
, t
);
10416 insn
= emit_insn (gen_push (t
));
10417 RTX_FRAME_RELATED_P (insn
) = 1;
10419 /* For the purposes of frame and register save area addressing,
10420 we've started over with a new frame. */
10421 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10422 m
->fs
.realigned
= true;
10425 int_registers_saved
= (frame
.nregs
== 0);
10426 sse_registers_saved
= (frame
.nsseregs
== 0);
10428 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10430 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10431 slower on all targets. Also sdb doesn't like it. */
10432 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10433 RTX_FRAME_RELATED_P (insn
) = 1;
10435 /* Push registers now, before setting the frame pointer
10437 if (!int_registers_saved
10439 && !frame
.save_regs_using_mov
)
10441 ix86_emit_save_regs ();
10442 int_registers_saved
= true;
10443 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10446 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10448 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10449 RTX_FRAME_RELATED_P (insn
) = 1;
10451 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10452 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10453 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10454 m
->fs
.fp_valid
= true;
10458 if (!int_registers_saved
)
10460 /* If saving registers via PUSH, do so now. */
10461 if (!frame
.save_regs_using_mov
)
10463 ix86_emit_save_regs ();
10464 int_registers_saved
= true;
10465 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10468 /* When using red zone we may start register saving before allocating
10469 the stack frame saving one cycle of the prologue. However, avoid
10470 doing this if we have to probe the stack; at least on x86_64 the
10471 stack probe can turn into a call that clobbers a red zone location. */
10472 else if (ix86_using_red_zone ()
10473 && (! TARGET_STACK_PROBE
10474 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10476 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10477 int_registers_saved
= true;
10481 if (stack_realign_fp
)
10483 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10484 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10486 /* The computation of the size of the re-aligned stack frame means
10487 that we must allocate the size of the register save area before
10488 performing the actual alignment. Otherwise we cannot guarantee
10489 that there's enough storage above the realignment point. */
10490 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10491 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10492 GEN_INT (m
->fs
.sp_offset
10493 - frame
.sse_reg_save_offset
),
10496 /* Align the stack. */
10497 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10499 GEN_INT (-align_bytes
)));
10501 /* For the purposes of register save area addressing, the stack
10502 pointer is no longer valid. As for the value of sp_offset,
10503 see ix86_compute_frame_layout, which we need to match in order
10504 to pass verification of stack_pointer_offset at the end. */
10505 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10506 m
->fs
.sp_valid
= false;
10509 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10511 if (flag_stack_usage_info
)
10513 /* We start to count from ARG_POINTER. */
10514 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10516 /* If it was realigned, take into account the fake frame. */
10517 if (stack_realign_drap
)
10519 if (ix86_static_chain_on_stack
)
10520 stack_size
+= UNITS_PER_WORD
;
10522 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10523 stack_size
+= UNITS_PER_WORD
;
10525 /* This over-estimates by 1 minimal-stack-alignment-unit but
10526 mitigates that by counting in the new return address slot. */
10527 current_function_dynamic_stack_size
10528 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10531 current_function_static_stack_size
= stack_size
;
10534 /* On SEH target with very large frame size, allocate an area to save
10535 SSE registers (as the very large allocation won't be described). */
10537 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10538 && !sse_registers_saved
)
10540 HOST_WIDE_INT sse_size
=
10541 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10543 gcc_assert (int_registers_saved
);
10545 /* No need to do stack checking as the area will be immediately
10547 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10548 GEN_INT (-sse_size
), -1,
10549 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10550 allocate
-= sse_size
;
10551 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10552 sse_registers_saved
= true;
10555 /* The stack has already been decremented by the instruction calling us
10556 so probe if the size is non-negative to preserve the protection area. */
10557 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10559 /* We expect the registers to be saved when probes are used. */
10560 gcc_assert (int_registers_saved
);
10562 if (STACK_CHECK_MOVING_SP
)
10564 ix86_adjust_stack_and_probe (allocate
);
10569 HOST_WIDE_INT size
= allocate
;
10571 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10572 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10574 if (TARGET_STACK_PROBE
)
10575 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10577 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10583 else if (!ix86_target_stack_probe ()
10584 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10586 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10587 GEN_INT (-allocate
), -1,
10588 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10592 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10594 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10596 bool eax_live
= false;
10597 bool r10_live
= false;
10600 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10601 if (!TARGET_64BIT_MS_ABI
)
10602 eax_live
= ix86_eax_live_at_start_p ();
10606 emit_insn (gen_push (eax
));
10607 allocate
-= UNITS_PER_WORD
;
10611 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10612 emit_insn (gen_push (r10
));
10613 allocate
-= UNITS_PER_WORD
;
10616 emit_move_insn (eax
, GEN_INT (allocate
));
10617 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10619 /* Use the fact that AX still contains ALLOCATE. */
10620 adjust_stack_insn
= (Pmode
== DImode
10621 ? gen_pro_epilogue_adjust_stack_di_sub
10622 : gen_pro_epilogue_adjust_stack_si_sub
);
10624 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10625 stack_pointer_rtx
, eax
));
10627 /* Note that SEH directives need to continue tracking the stack
10628 pointer even after the frame pointer has been set up. */
10629 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10631 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10632 m
->fs
.cfa_offset
+= allocate
;
10634 RTX_FRAME_RELATED_P (insn
) = 1;
10635 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10636 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10637 plus_constant (Pmode
, stack_pointer_rtx
,
10640 m
->fs
.sp_offset
+= allocate
;
10642 if (r10_live
&& eax_live
)
10644 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10645 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10646 gen_frame_mem (word_mode
, t
));
10647 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10648 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10649 gen_frame_mem (word_mode
, t
));
10651 else if (eax_live
|| r10_live
)
10653 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10654 emit_move_insn (gen_rtx_REG (word_mode
,
10655 (eax_live
? AX_REG
: R10_REG
)),
10656 gen_frame_mem (word_mode
, t
));
10659 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10661 /* If we havn't already set up the frame pointer, do so now. */
10662 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10664 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10665 GEN_INT (frame
.stack_pointer_offset
10666 - frame
.hard_frame_pointer_offset
));
10667 insn
= emit_insn (insn
);
10668 RTX_FRAME_RELATED_P (insn
) = 1;
10669 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10671 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10672 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10673 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10674 m
->fs
.fp_valid
= true;
10677 if (!int_registers_saved
)
10678 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10679 if (!sse_registers_saved
)
10680 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10682 pic_reg_used
= false;
10683 if (pic_offset_table_rtx
10684 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10687 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10689 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10690 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10692 pic_reg_used
= true;
10699 if (ix86_cmodel
== CM_LARGE_PIC
)
10701 rtx label
, tmp_reg
;
10703 gcc_assert (Pmode
== DImode
);
10704 label
= gen_label_rtx ();
10705 emit_label (label
);
10706 LABEL_PRESERVE_P (label
) = 1;
10707 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10708 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10709 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10711 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10712 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10713 pic_offset_table_rtx
, tmp_reg
));
10716 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10720 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10721 RTX_FRAME_RELATED_P (insn
) = 1;
10722 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10726 /* In the pic_reg_used case, make sure that the got load isn't deleted
10727 when mcount needs it. Blockage to avoid call movement across mcount
10728 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10730 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10731 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10733 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10735 /* vDRAP is setup but after reload it turns out stack realign
10736 isn't necessary, here we will emit prologue to setup DRAP
10737 without stack realign adjustment */
10738 t
= choose_baseaddr (0);
10739 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10742 /* Prevent instructions from being scheduled into register save push
10743 sequence when access to the redzone area is done through frame pointer.
10744 The offset between the frame pointer and the stack pointer is calculated
10745 relative to the value of the stack pointer at the end of the function
10746 prologue, and moving instructions that access redzone area via frame
10747 pointer inside push sequence violates this assumption. */
10748 if (frame_pointer_needed
&& frame
.red_zone_size
)
10749 emit_insn (gen_memory_blockage ());
10751 /* Emit cld instruction if stringops are used in the function. */
10752 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10753 emit_insn (gen_cld ());
10755 /* SEH requires that the prologue end within 256 bytes of the start of
10756 the function. Prevent instruction schedules that would extend that.
10757 Further, prevent alloca modifications to the stack pointer from being
10758 combined with prologue modifications. */
10760 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10763 /* Emit code to restore REG using a POP insn. */
10766 ix86_emit_restore_reg_using_pop (rtx reg
)
10768 struct machine_function
*m
= cfun
->machine
;
10769 rtx insn
= emit_insn (gen_pop (reg
));
10771 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10772 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10774 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10775 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10777 /* Previously we'd represented the CFA as an expression
10778 like *(%ebp - 8). We've just popped that value from
10779 the stack, which means we need to reset the CFA to
10780 the drap register. This will remain until we restore
10781 the stack pointer. */
10782 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10783 RTX_FRAME_RELATED_P (insn
) = 1;
10785 /* This means that the DRAP register is valid for addressing too. */
10786 m
->fs
.drap_valid
= true;
10790 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10792 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10793 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10794 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10795 RTX_FRAME_RELATED_P (insn
) = 1;
10797 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10800 /* When the frame pointer is the CFA, and we pop it, we are
10801 swapping back to the stack pointer as the CFA. This happens
10802 for stack frames that don't allocate other data, so we assume
10803 the stack pointer is now pointing at the return address, i.e.
10804 the function entry state, which makes the offset be 1 word. */
10805 if (reg
== hard_frame_pointer_rtx
)
10807 m
->fs
.fp_valid
= false;
10808 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10810 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10811 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10813 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10814 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10815 GEN_INT (m
->fs
.cfa_offset
)));
10816 RTX_FRAME_RELATED_P (insn
) = 1;
10821 /* Emit code to restore saved registers using POP insns. */
10824 ix86_emit_restore_regs_using_pop (void)
10826 unsigned int regno
;
10828 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10829 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10830 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10833 /* Emit code and notes for the LEAVE instruction. */
10836 ix86_emit_leave (void)
10838 struct machine_function
*m
= cfun
->machine
;
10839 rtx insn
= emit_insn (ix86_gen_leave ());
10841 ix86_add_queued_cfa_restore_notes (insn
);
10843 gcc_assert (m
->fs
.fp_valid
);
10844 m
->fs
.sp_valid
= true;
10845 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10846 m
->fs
.fp_valid
= false;
10848 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10850 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10851 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10853 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10854 plus_constant (Pmode
, stack_pointer_rtx
,
10856 RTX_FRAME_RELATED_P (insn
) = 1;
10858 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10862 /* Emit code to restore saved registers using MOV insns.
10863 First register is restored from CFA - CFA_OFFSET. */
10865 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10866 bool maybe_eh_return
)
10868 struct machine_function
*m
= cfun
->machine
;
10869 unsigned int regno
;
10871 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10872 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10874 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10877 mem
= choose_baseaddr (cfa_offset
);
10878 mem
= gen_frame_mem (word_mode
, mem
);
10879 insn
= emit_move_insn (reg
, mem
);
10881 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10883 /* Previously we'd represented the CFA as an expression
10884 like *(%ebp - 8). We've just popped that value from
10885 the stack, which means we need to reset the CFA to
10886 the drap register. This will remain until we restore
10887 the stack pointer. */
10888 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10889 RTX_FRAME_RELATED_P (insn
) = 1;
10891 /* This means that the DRAP register is valid for addressing. */
10892 m
->fs
.drap_valid
= true;
10895 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10897 cfa_offset
-= UNITS_PER_WORD
;
10901 /* Emit code to restore saved registers using MOV insns.
10902 First register is restored from CFA - CFA_OFFSET. */
10904 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10905 bool maybe_eh_return
)
10907 unsigned int regno
;
10909 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10910 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10912 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10915 mem
= choose_baseaddr (cfa_offset
);
10916 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10917 set_mem_align (mem
, 128);
10918 emit_move_insn (reg
, mem
);
10920 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10926 /* Emit vzeroupper if needed. */
10929 ix86_maybe_emit_epilogue_vzeroupper (void)
10931 if (TARGET_VZEROUPPER
10932 && !TREE_THIS_VOLATILE (cfun
->decl
)
10933 && !cfun
->machine
->caller_return_avx256_p
)
10934 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10937 /* Restore function stack, frame, and registers. */
10940 ix86_expand_epilogue (int style
)
10942 struct machine_function
*m
= cfun
->machine
;
10943 struct machine_frame_state frame_state_save
= m
->fs
;
10944 struct ix86_frame frame
;
10945 bool restore_regs_via_mov
;
10948 ix86_finalize_stack_realign_flags ();
10949 ix86_compute_frame_layout (&frame
);
10951 m
->fs
.sp_valid
= (!frame_pointer_needed
10952 || (crtl
->sp_is_unchanging
10953 && !stack_realign_fp
));
10954 gcc_assert (!m
->fs
.sp_valid
10955 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10957 /* The FP must be valid if the frame pointer is present. */
10958 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10959 gcc_assert (!m
->fs
.fp_valid
10960 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10962 /* We must have *some* valid pointer to the stack frame. */
10963 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10965 /* The DRAP is never valid at this point. */
10966 gcc_assert (!m
->fs
.drap_valid
);
10968 /* See the comment about red zone and frame
10969 pointer usage in ix86_expand_prologue. */
10970 if (frame_pointer_needed
&& frame
.red_zone_size
)
10971 emit_insn (gen_memory_blockage ());
10973 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10974 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10976 /* Determine the CFA offset of the end of the red-zone. */
10977 m
->fs
.red_zone_offset
= 0;
10978 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10980 /* The red-zone begins below the return address. */
10981 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10983 /* When the register save area is in the aligned portion of
10984 the stack, determine the maximum runtime displacement that
10985 matches up with the aligned frame. */
10986 if (stack_realign_drap
)
10987 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10991 /* Special care must be taken for the normal return case of a function
10992 using eh_return: the eax and edx registers are marked as saved, but
10993 not restored along this path. Adjust the save location to match. */
10994 if (crtl
->calls_eh_return
&& style
!= 2)
10995 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10997 /* EH_RETURN requires the use of moves to function properly. */
10998 if (crtl
->calls_eh_return
)
10999 restore_regs_via_mov
= true;
11000 /* SEH requires the use of pops to identify the epilogue. */
11001 else if (TARGET_SEH
)
11002 restore_regs_via_mov
= false;
11003 /* If we're only restoring one register and sp is not valid then
11004 using a move instruction to restore the register since it's
11005 less work than reloading sp and popping the register. */
11006 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11007 restore_regs_via_mov
= true;
11008 else if (TARGET_EPILOGUE_USING_MOVE
11009 && cfun
->machine
->use_fast_prologue_epilogue
11010 && (frame
.nregs
> 1
11011 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11012 restore_regs_via_mov
= true;
11013 else if (frame_pointer_needed
11015 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11016 restore_regs_via_mov
= true;
11017 else if (frame_pointer_needed
11018 && TARGET_USE_LEAVE
11019 && cfun
->machine
->use_fast_prologue_epilogue
11020 && frame
.nregs
== 1)
11021 restore_regs_via_mov
= true;
11023 restore_regs_via_mov
= false;
11025 if (restore_regs_via_mov
|| frame
.nsseregs
)
11027 /* Ensure that the entire register save area is addressable via
11028 the stack pointer, if we will restore via sp. */
11030 && m
->fs
.sp_offset
> 0x7fffffff
11031 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11032 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11034 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11035 GEN_INT (m
->fs
.sp_offset
11036 - frame
.sse_reg_save_offset
),
11038 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11042 /* If there are any SSE registers to restore, then we have to do it
11043 via moves, since there's obviously no pop for SSE regs. */
11044 if (frame
.nsseregs
)
11045 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11048 if (restore_regs_via_mov
)
11053 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11055 /* eh_return epilogues need %ecx added to the stack pointer. */
11058 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11060 /* Stack align doesn't work with eh_return. */
11061 gcc_assert (!stack_realign_drap
);
11062 /* Neither does regparm nested functions. */
11063 gcc_assert (!ix86_static_chain_on_stack
);
11065 if (frame_pointer_needed
)
11067 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11068 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11069 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11071 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11072 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11074 /* Note that we use SA as a temporary CFA, as the return
11075 address is at the proper place relative to it. We
11076 pretend this happens at the FP restore insn because
11077 prior to this insn the FP would be stored at the wrong
11078 offset relative to SA, and after this insn we have no
11079 other reasonable register to use for the CFA. We don't
11080 bother resetting the CFA to the SP for the duration of
11081 the return insn. */
11082 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11083 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11084 ix86_add_queued_cfa_restore_notes (insn
);
11085 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11086 RTX_FRAME_RELATED_P (insn
) = 1;
11088 m
->fs
.cfa_reg
= sa
;
11089 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11090 m
->fs
.fp_valid
= false;
11092 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11093 const0_rtx
, style
, false);
11097 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11098 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11099 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11100 ix86_add_queued_cfa_restore_notes (insn
);
11102 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11103 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11105 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11106 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11107 plus_constant (Pmode
, stack_pointer_rtx
,
11109 RTX_FRAME_RELATED_P (insn
) = 1;
11112 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11113 m
->fs
.sp_valid
= true;
11118 /* SEH requires that the function end with (1) a stack adjustment
11119 if necessary, (2) a sequence of pops, and (3) a return or
11120 jump instruction. Prevent insns from the function body from
11121 being scheduled into this sequence. */
11124 /* Prevent a catch region from being adjacent to the standard
11125 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11126 several other flags that would be interesting to test are
11128 if (flag_non_call_exceptions
)
11129 emit_insn (gen_nops (const1_rtx
));
11131 emit_insn (gen_blockage ());
11134 /* First step is to deallocate the stack frame so that we can
11135 pop the registers. Also do it on SEH target for very large
11136 frame as the emitted instructions aren't allowed by the ABI in
11138 if (!m
->fs
.sp_valid
11140 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11141 >= SEH_MAX_FRAME_SIZE
)))
11143 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11144 GEN_INT (m
->fs
.fp_offset
11145 - frame
.reg_save_offset
),
11148 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11150 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11151 GEN_INT (m
->fs
.sp_offset
11152 - frame
.reg_save_offset
),
11154 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11157 ix86_emit_restore_regs_using_pop ();
11160 /* If we used a stack pointer and haven't already got rid of it,
11162 if (m
->fs
.fp_valid
)
11164 /* If the stack pointer is valid and pointing at the frame
11165 pointer store address, then we only need a pop. */
11166 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11167 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11168 /* Leave results in shorter dependency chains on CPUs that are
11169 able to grok it fast. */
11170 else if (TARGET_USE_LEAVE
11171 || optimize_function_for_size_p (cfun
)
11172 || !cfun
->machine
->use_fast_prologue_epilogue
)
11173 ix86_emit_leave ();
11176 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11177 hard_frame_pointer_rtx
,
11178 const0_rtx
, style
, !using_drap
);
11179 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11185 int param_ptr_offset
= UNITS_PER_WORD
;
11188 gcc_assert (stack_realign_drap
);
11190 if (ix86_static_chain_on_stack
)
11191 param_ptr_offset
+= UNITS_PER_WORD
;
11192 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11193 param_ptr_offset
+= UNITS_PER_WORD
;
11195 insn
= emit_insn (gen_rtx_SET
11196 (VOIDmode
, stack_pointer_rtx
,
11197 gen_rtx_PLUS (Pmode
,
11199 GEN_INT (-param_ptr_offset
))));
11200 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11201 m
->fs
.cfa_offset
= param_ptr_offset
;
11202 m
->fs
.sp_offset
= param_ptr_offset
;
11203 m
->fs
.realigned
= false;
11205 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11206 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11207 GEN_INT (param_ptr_offset
)));
11208 RTX_FRAME_RELATED_P (insn
) = 1;
11210 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11211 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11214 /* At this point the stack pointer must be valid, and we must have
11215 restored all of the registers. We may not have deallocated the
11216 entire stack frame. We've delayed this until now because it may
11217 be possible to merge the local stack deallocation with the
11218 deallocation forced by ix86_static_chain_on_stack. */
11219 gcc_assert (m
->fs
.sp_valid
);
11220 gcc_assert (!m
->fs
.fp_valid
);
11221 gcc_assert (!m
->fs
.realigned
);
11222 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11224 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11225 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11229 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11231 /* Sibcall epilogues don't want a return instruction. */
11234 m
->fs
= frame_state_save
;
11238 /* Emit vzeroupper if needed. */
11239 ix86_maybe_emit_epilogue_vzeroupper ();
11241 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11243 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11245 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11246 address, do explicit add, and jump indirectly to the caller. */
11248 if (crtl
->args
.pops_args
>= 65536)
11250 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11253 /* There is no "pascal" calling convention in any 64bit ABI. */
11254 gcc_assert (!TARGET_64BIT
);
11256 insn
= emit_insn (gen_pop (ecx
));
11257 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11258 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11260 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11261 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11262 add_reg_note (insn
, REG_CFA_REGISTER
,
11263 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11264 RTX_FRAME_RELATED_P (insn
) = 1;
11266 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11268 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11271 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11274 emit_jump_insn (gen_simple_return_internal ());
11276 /* Restore the state back to the state from the prologue,
11277 so that it's correct for the next epilogue. */
11278 m
->fs
= frame_state_save
;
11281 /* Reset from the function's potential modifications. */
11284 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11285 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11287 if (pic_offset_table_rtx
)
11288 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11290 /* Mach-O doesn't support labels at the end of objects, so if
11291 it looks like we might want one, insert a NOP. */
11293 rtx insn
= get_last_insn ();
11294 rtx deleted_debug_label
= NULL_RTX
;
11297 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11299 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11300 notes only, instead set their CODE_LABEL_NUMBER to -1,
11301 otherwise there would be code generation differences
11302 in between -g and -g0. */
11303 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11304 deleted_debug_label
= insn
;
11305 insn
= PREV_INSN (insn
);
11310 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11311 fputs ("\tnop\n", file
);
11312 else if (deleted_debug_label
)
11313 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11314 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11315 CODE_LABEL_NUMBER (insn
) = -1;
11321 /* Return a scratch register to use in the split stack prologue. The
11322 split stack prologue is used for -fsplit-stack. It is the first
11323 instructions in the function, even before the regular prologue.
11324 The scratch register can be any caller-saved register which is not
11325 used for parameters or for the static chain. */
11327 static unsigned int
11328 split_stack_prologue_scratch_regno (void)
11337 is_fastcall
= (lookup_attribute ("fastcall",
11338 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11340 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11344 if (DECL_STATIC_CHAIN (cfun
->decl
))
11346 sorry ("-fsplit-stack does not support fastcall with "
11347 "nested function");
11348 return INVALID_REGNUM
;
11352 else if (regparm
< 3)
11354 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11360 sorry ("-fsplit-stack does not support 2 register "
11361 " parameters for a nested function");
11362 return INVALID_REGNUM
;
11369 /* FIXME: We could make this work by pushing a register
11370 around the addition and comparison. */
11371 sorry ("-fsplit-stack does not support 3 register parameters");
11372 return INVALID_REGNUM
;
11377 /* A SYMBOL_REF for the function which allocates new stackspace for
11380 static GTY(()) rtx split_stack_fn
;
11382 /* A SYMBOL_REF for the more stack function when using the large
11385 static GTY(()) rtx split_stack_fn_large
;
11387 /* Handle -fsplit-stack. These are the first instructions in the
11388 function, even before the regular prologue. */
11391 ix86_expand_split_stack_prologue (void)
11393 struct ix86_frame frame
;
11394 HOST_WIDE_INT allocate
;
11395 unsigned HOST_WIDE_INT args_size
;
11396 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11397 rtx scratch_reg
= NULL_RTX
;
11398 rtx varargs_label
= NULL_RTX
;
11401 gcc_assert (flag_split_stack
&& reload_completed
);
11403 ix86_finalize_stack_realign_flags ();
11404 ix86_compute_frame_layout (&frame
);
11405 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11407 /* This is the label we will branch to if we have enough stack
11408 space. We expect the basic block reordering pass to reverse this
11409 branch if optimizing, so that we branch in the unlikely case. */
11410 label
= gen_label_rtx ();
11412 /* We need to compare the stack pointer minus the frame size with
11413 the stack boundary in the TCB. The stack boundary always gives
11414 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11415 can compare directly. Otherwise we need to do an addition. */
11417 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11418 UNSPEC_STACK_CHECK
);
11419 limit
= gen_rtx_CONST (Pmode
, limit
);
11420 limit
= gen_rtx_MEM (Pmode
, limit
);
11421 if (allocate
< SPLIT_STACK_AVAILABLE
)
11422 current
= stack_pointer_rtx
;
11425 unsigned int scratch_regno
;
11428 /* We need a scratch register to hold the stack pointer minus
11429 the required frame size. Since this is the very start of the
11430 function, the scratch register can be any caller-saved
11431 register which is not used for parameters. */
11432 offset
= GEN_INT (- allocate
);
11433 scratch_regno
= split_stack_prologue_scratch_regno ();
11434 if (scratch_regno
== INVALID_REGNUM
)
11436 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11437 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11439 /* We don't use ix86_gen_add3 in this case because it will
11440 want to split to lea, but when not optimizing the insn
11441 will not be split after this point. */
11442 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11443 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11448 emit_move_insn (scratch_reg
, offset
);
11449 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11450 stack_pointer_rtx
));
11452 current
= scratch_reg
;
11455 ix86_expand_branch (GEU
, current
, limit
, label
);
11456 jump_insn
= get_last_insn ();
11457 JUMP_LABEL (jump_insn
) = label
;
11459 /* Mark the jump as very likely to be taken. */
11460 add_reg_note (jump_insn
, REG_BR_PROB
,
11461 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11463 if (split_stack_fn
== NULL_RTX
)
11464 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11465 fn
= split_stack_fn
;
11467 /* Get more stack space. We pass in the desired stack space and the
11468 size of the arguments to copy to the new stack. In 32-bit mode
11469 we push the parameters; __morestack will return on a new stack
11470 anyhow. In 64-bit mode we pass the parameters in r10 and
11472 allocate_rtx
= GEN_INT (allocate
);
11473 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11474 call_fusage
= NULL_RTX
;
11479 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11480 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11482 /* If this function uses a static chain, it will be in %r10.
11483 Preserve it across the call to __morestack. */
11484 if (DECL_STATIC_CHAIN (cfun
->decl
))
11488 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11489 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11490 use_reg (&call_fusage
, rax
);
11493 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11495 HOST_WIDE_INT argval
;
11497 gcc_assert (Pmode
== DImode
);
11498 /* When using the large model we need to load the address
11499 into a register, and we've run out of registers. So we
11500 switch to a different calling convention, and we call a
11501 different function: __morestack_large. We pass the
11502 argument size in the upper 32 bits of r10 and pass the
11503 frame size in the lower 32 bits. */
11504 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11505 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11507 if (split_stack_fn_large
== NULL_RTX
)
11508 split_stack_fn_large
=
11509 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11511 if (ix86_cmodel
== CM_LARGE_PIC
)
11515 label
= gen_label_rtx ();
11516 emit_label (label
);
11517 LABEL_PRESERVE_P (label
) = 1;
11518 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11519 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11520 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11521 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11523 x
= gen_rtx_CONST (Pmode
, x
);
11524 emit_move_insn (reg11
, x
);
11525 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11526 x
= gen_const_mem (Pmode
, x
);
11527 emit_move_insn (reg11
, x
);
11530 emit_move_insn (reg11
, split_stack_fn_large
);
11534 argval
= ((args_size
<< 16) << 16) + allocate
;
11535 emit_move_insn (reg10
, GEN_INT (argval
));
11539 emit_move_insn (reg10
, allocate_rtx
);
11540 emit_move_insn (reg11
, GEN_INT (args_size
));
11541 use_reg (&call_fusage
, reg11
);
11544 use_reg (&call_fusage
, reg10
);
11548 emit_insn (gen_push (GEN_INT (args_size
)));
11549 emit_insn (gen_push (allocate_rtx
));
11551 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11552 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11554 add_function_usage_to (call_insn
, call_fusage
);
11556 /* In order to make call/return prediction work right, we now need
11557 to execute a return instruction. See
11558 libgcc/config/i386/morestack.S for the details on how this works.
11560 For flow purposes gcc must not see this as a return
11561 instruction--we need control flow to continue at the subsequent
11562 label. Therefore, we use an unspec. */
11563 gcc_assert (crtl
->args
.pops_args
< 65536);
11564 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11566 /* If we are in 64-bit mode and this function uses a static chain,
11567 we saved %r10 in %rax before calling _morestack. */
11568 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11569 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11570 gen_rtx_REG (word_mode
, AX_REG
));
11572 /* If this function calls va_start, we need to store a pointer to
11573 the arguments on the old stack, because they may not have been
11574 all copied to the new stack. At this point the old stack can be
11575 found at the frame pointer value used by __morestack, because
11576 __morestack has set that up before calling back to us. Here we
11577 store that pointer in a scratch register, and in
11578 ix86_expand_prologue we store the scratch register in a stack
11580 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11582 unsigned int scratch_regno
;
11586 scratch_regno
= split_stack_prologue_scratch_regno ();
11587 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11588 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11592 return address within this function
11593 return address of caller of this function
11595 So we add three words to get to the stack arguments.
11599 return address within this function
11600 first argument to __morestack
11601 second argument to __morestack
11602 return address of caller of this function
11604 So we add five words to get to the stack arguments.
11606 words
= TARGET_64BIT
? 3 : 5;
11607 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11608 gen_rtx_PLUS (Pmode
, frame_reg
,
11609 GEN_INT (words
* UNITS_PER_WORD
))));
11611 varargs_label
= gen_label_rtx ();
11612 emit_jump_insn (gen_jump (varargs_label
));
11613 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11618 emit_label (label
);
11619 LABEL_NUSES (label
) = 1;
11621 /* If this function calls va_start, we now have to set the scratch
11622 register for the case where we do not call __morestack. In this
11623 case we need to set it based on the stack pointer. */
11624 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11626 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11627 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11628 GEN_INT (UNITS_PER_WORD
))));
11630 emit_label (varargs_label
);
11631 LABEL_NUSES (varargs_label
) = 1;
11635 /* We may have to tell the dataflow pass that the split stack prologue
11636 is initializing a scratch register. */
11639 ix86_live_on_entry (bitmap regs
)
11641 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11643 gcc_assert (flag_split_stack
);
11644 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11648 /* Determine if op is suitable SUBREG RTX for address. */
11651 ix86_address_subreg_operand (rtx op
)
11653 enum machine_mode mode
;
11658 mode
= GET_MODE (op
);
11660 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11663 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11664 failures when the register is one word out of a two word structure. */
11665 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11668 /* simplify_subreg does not handle stack pointer. */
11669 if (REGNO (op
) == STACK_POINTER_REGNUM
)
11672 /* Allow only SUBREGs of non-eliminable hard registers. */
11673 return register_no_elim_operand (op
, mode
);
11676 /* Extract the parts of an RTL expression that is a valid memory address
11677 for an instruction. Return 0 if the structure of the address is
11678 grossly off. Return -1 if the address contains ASHIFT, so it is not
11679 strictly valid, but still used for computing length of lea instruction. */
11682 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11684 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11685 rtx base_reg
, index_reg
;
11686 HOST_WIDE_INT scale
= 1;
11687 rtx scale_rtx
= NULL_RTX
;
11690 enum ix86_address_seg seg
= SEG_DEFAULT
;
11692 /* Allow zero-extended SImode addresses,
11693 they will be emitted with addr32 prefix. */
11694 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11696 if (GET_CODE (addr
) == ZERO_EXTEND
11697 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11699 addr
= XEXP (addr
, 0);
11700 if (CONST_INT_P (addr
))
11703 else if (GET_CODE (addr
) == AND
11704 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11706 addr
= XEXP (addr
, 0);
11708 /* Adjust SUBREGs. */
11709 if (GET_CODE (addr
) == SUBREG
11710 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11712 addr
= SUBREG_REG (addr
);
11713 if (CONST_INT_P (addr
))
11716 else if (GET_MODE (addr
) == DImode
)
11717 addr
= gen_rtx_SUBREG (SImode
, addr
, 0);
11718 else if (GET_MODE (addr
) != VOIDmode
)
11723 /* Allow SImode subregs of DImode addresses,
11724 they will be emitted with addr32 prefix. */
11725 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11727 if (GET_CODE (addr
) == SUBREG
11728 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11730 addr
= SUBREG_REG (addr
);
11731 if (CONST_INT_P (addr
))
11738 else if (GET_CODE (addr
) == SUBREG
)
11740 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11745 else if (GET_CODE (addr
) == PLUS
)
11747 rtx addends
[4], op
;
11755 addends
[n
++] = XEXP (op
, 1);
11758 while (GET_CODE (op
) == PLUS
);
11763 for (i
= n
; i
>= 0; --i
)
11766 switch (GET_CODE (op
))
11771 index
= XEXP (op
, 0);
11772 scale_rtx
= XEXP (op
, 1);
11778 index
= XEXP (op
, 0);
11779 tmp
= XEXP (op
, 1);
11780 if (!CONST_INT_P (tmp
))
11782 scale
= INTVAL (tmp
);
11783 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11785 scale
= 1 << scale
;
11790 if (GET_CODE (op
) != UNSPEC
)
11795 if (XINT (op
, 1) == UNSPEC_TP
11796 && TARGET_TLS_DIRECT_SEG_REFS
11797 && seg
== SEG_DEFAULT
)
11798 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11804 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11831 else if (GET_CODE (addr
) == MULT
)
11833 index
= XEXP (addr
, 0); /* index*scale */
11834 scale_rtx
= XEXP (addr
, 1);
11836 else if (GET_CODE (addr
) == ASHIFT
)
11838 /* We're called for lea too, which implements ashift on occasion. */
11839 index
= XEXP (addr
, 0);
11840 tmp
= XEXP (addr
, 1);
11841 if (!CONST_INT_P (tmp
))
11843 scale
= INTVAL (tmp
);
11844 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11846 scale
= 1 << scale
;
11849 else if (CONST_INT_P (addr
))
11851 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11854 /* Constant addresses are sign extended to 64bit, we have to
11855 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11857 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11863 disp
= addr
; /* displacement */
11869 else if (GET_CODE (index
) == SUBREG
11870 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11876 /* Address override works only on the (%reg) part of %fs:(%reg). */
11877 if (seg
!= SEG_DEFAULT
11878 && ((base
&& GET_MODE (base
) != word_mode
)
11879 || (index
&& GET_MODE (index
) != word_mode
)))
11882 /* Extract the integral value of scale. */
11885 if (!CONST_INT_P (scale_rtx
))
11887 scale
= INTVAL (scale_rtx
);
11890 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11891 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11893 /* Avoid useless 0 displacement. */
11894 if (disp
== const0_rtx
&& (base
|| index
))
11897 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11898 if (base_reg
&& index_reg
&& scale
== 1
11899 && (index_reg
== arg_pointer_rtx
11900 || index_reg
== frame_pointer_rtx
11901 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11904 tmp
= base
, base
= index
, index
= tmp
;
11905 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11908 /* Special case: %ebp cannot be encoded as a base without a displacement.
11912 && (base_reg
== hard_frame_pointer_rtx
11913 || base_reg
== frame_pointer_rtx
11914 || base_reg
== arg_pointer_rtx
11915 || (REG_P (base_reg
)
11916 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11917 || REGNO (base_reg
) == R13_REG
))))
11920 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11921 Avoid this by transforming to [%esi+0].
11922 Reload calls address legitimization without cfun defined, so we need
11923 to test cfun for being non-NULL. */
11924 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11925 && base_reg
&& !index_reg
&& !disp
11926 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11929 /* Special case: encode reg+reg instead of reg*2. */
11930 if (!base
&& index
&& scale
== 2)
11931 base
= index
, base_reg
= index_reg
, scale
= 1;
11933 /* Special case: scaling cannot be encoded without base or displacement. */
11934 if (!base
&& !disp
&& index
&& scale
!= 1)
11938 out
->index
= index
;
11940 out
->scale
= scale
;
11946 /* Return cost of the memory address x.
11947 For i386, it is better to use a complex address than let gcc copy
11948 the address into a reg and make a new pseudo. But not if the address
11949 requires to two regs - that would mean more pseudos with longer
11952 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11954 struct ix86_address parts
;
11956 int ok
= ix86_decompose_address (x
, &parts
);
11960 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11961 parts
.base
= SUBREG_REG (parts
.base
);
11962 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11963 parts
.index
= SUBREG_REG (parts
.index
);
11965 /* Attempt to minimize number of registers in the address. */
11967 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11969 && (!REG_P (parts
.index
)
11970 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11974 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11976 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11977 && parts
.base
!= parts
.index
)
11980 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11981 since it's predecode logic can't detect the length of instructions
11982 and it degenerates to vector decoded. Increase cost of such
11983 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11984 to split such addresses or even refuse such addresses at all.
11986 Following addressing modes are affected:
11991 The first and last case may be avoidable by explicitly coding the zero in
11992 memory address, but I don't have AMD-K6 machine handy to check this
11996 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11997 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11998 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12004 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12005 this is used for to form addresses to local data when -fPIC is in
12009 darwin_local_data_pic (rtx disp
)
12011 return (GET_CODE (disp
) == UNSPEC
12012 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12015 /* Determine if a given RTX is a valid constant. We already know this
12016 satisfies CONSTANT_P. */
12019 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12021 switch (GET_CODE (x
))
12026 if (GET_CODE (x
) == PLUS
)
12028 if (!CONST_INT_P (XEXP (x
, 1)))
12033 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12036 /* Only some unspecs are valid as "constants". */
12037 if (GET_CODE (x
) == UNSPEC
)
12038 switch (XINT (x
, 1))
12041 case UNSPEC_GOTOFF
:
12042 case UNSPEC_PLTOFF
:
12043 return TARGET_64BIT
;
12045 case UNSPEC_NTPOFF
:
12046 x
= XVECEXP (x
, 0, 0);
12047 return (GET_CODE (x
) == SYMBOL_REF
12048 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12049 case UNSPEC_DTPOFF
:
12050 x
= XVECEXP (x
, 0, 0);
12051 return (GET_CODE (x
) == SYMBOL_REF
12052 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12057 /* We must have drilled down to a symbol. */
12058 if (GET_CODE (x
) == LABEL_REF
)
12060 if (GET_CODE (x
) != SYMBOL_REF
)
12065 /* TLS symbols are never valid. */
12066 if (SYMBOL_REF_TLS_MODEL (x
))
12069 /* DLLIMPORT symbols are never valid. */
12070 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12071 && SYMBOL_REF_DLLIMPORT_P (x
))
12075 /* mdynamic-no-pic */
12076 if (MACHO_DYNAMIC_NO_PIC_P
)
12077 return machopic_symbol_defined_p (x
);
12082 if (GET_MODE (x
) == TImode
12083 && x
!= CONST0_RTX (TImode
)
12089 if (!standard_sse_constant_p (x
))
12096 /* Otherwise we handle everything else in the move patterns. */
12100 /* Determine if it's legal to put X into the constant pool. This
12101 is not possible for the address of thread-local symbols, which
12102 is checked above. */
12105 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12107 /* We can always put integral constants and vectors in memory. */
12108 switch (GET_CODE (x
))
12118 return !ix86_legitimate_constant_p (mode
, x
);
12122 /* Nonzero if the constant value X is a legitimate general operand
12123 when generating PIC code. It is given that flag_pic is on and
12124 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12127 legitimate_pic_operand_p (rtx x
)
12131 switch (GET_CODE (x
))
12134 inner
= XEXP (x
, 0);
12135 if (GET_CODE (inner
) == PLUS
12136 && CONST_INT_P (XEXP (inner
, 1)))
12137 inner
= XEXP (inner
, 0);
12139 /* Only some unspecs are valid as "constants". */
12140 if (GET_CODE (inner
) == UNSPEC
)
12141 switch (XINT (inner
, 1))
12144 case UNSPEC_GOTOFF
:
12145 case UNSPEC_PLTOFF
:
12146 return TARGET_64BIT
;
12148 x
= XVECEXP (inner
, 0, 0);
12149 return (GET_CODE (x
) == SYMBOL_REF
12150 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12151 case UNSPEC_MACHOPIC_OFFSET
:
12152 return legitimate_pic_address_disp_p (x
);
12160 return legitimate_pic_address_disp_p (x
);
12167 /* Determine if a given CONST RTX is a valid memory displacement
12171 legitimate_pic_address_disp_p (rtx disp
)
12175 /* In 64bit mode we can allow direct addresses of symbols and labels
12176 when they are not dynamic symbols. */
12179 rtx op0
= disp
, op1
;
12181 switch (GET_CODE (disp
))
12187 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12189 op0
= XEXP (XEXP (disp
, 0), 0);
12190 op1
= XEXP (XEXP (disp
, 0), 1);
12191 if (!CONST_INT_P (op1
)
12192 || INTVAL (op1
) >= 16*1024*1024
12193 || INTVAL (op1
) < -16*1024*1024)
12195 if (GET_CODE (op0
) == LABEL_REF
)
12197 if (GET_CODE (op0
) == CONST
12198 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12199 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12201 if (GET_CODE (op0
) == UNSPEC
12202 && XINT (op0
, 1) == UNSPEC_PCREL
)
12204 if (GET_CODE (op0
) != SYMBOL_REF
)
12209 /* TLS references should always be enclosed in UNSPEC. */
12210 if (SYMBOL_REF_TLS_MODEL (op0
))
12212 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12213 && ix86_cmodel
!= CM_LARGE_PIC
)
12221 if (GET_CODE (disp
) != CONST
)
12223 disp
= XEXP (disp
, 0);
12227 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12228 of GOT tables. We should not need these anyway. */
12229 if (GET_CODE (disp
) != UNSPEC
12230 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12231 && XINT (disp
, 1) != UNSPEC_GOTOFF
12232 && XINT (disp
, 1) != UNSPEC_PCREL
12233 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12236 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12237 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12243 if (GET_CODE (disp
) == PLUS
)
12245 if (!CONST_INT_P (XEXP (disp
, 1)))
12247 disp
= XEXP (disp
, 0);
12251 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12254 if (GET_CODE (disp
) != UNSPEC
)
12257 switch (XINT (disp
, 1))
12262 /* We need to check for both symbols and labels because VxWorks loads
12263 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12265 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12266 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12267 case UNSPEC_GOTOFF
:
12268 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12269 While ABI specify also 32bit relocation but we don't produce it in
12270 small PIC model at all. */
12271 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12272 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12274 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12276 case UNSPEC_GOTTPOFF
:
12277 case UNSPEC_GOTNTPOFF
:
12278 case UNSPEC_INDNTPOFF
:
12281 disp
= XVECEXP (disp
, 0, 0);
12282 return (GET_CODE (disp
) == SYMBOL_REF
12283 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12284 case UNSPEC_NTPOFF
:
12285 disp
= XVECEXP (disp
, 0, 0);
12286 return (GET_CODE (disp
) == SYMBOL_REF
12287 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12288 case UNSPEC_DTPOFF
:
12289 disp
= XVECEXP (disp
, 0, 0);
12290 return (GET_CODE (disp
) == SYMBOL_REF
12291 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12297 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12298 replace the input X, or the original X if no replacement is called for.
12299 The output parameter *WIN is 1 if the calling macro should goto WIN,
12300 0 if it should not. */
12303 ix86_legitimize_reload_address (rtx x
,
12304 enum machine_mode mode ATTRIBUTE_UNUSED
,
12305 int opnum
, int type
,
12306 int ind_levels ATTRIBUTE_UNUSED
)
12308 /* Reload can generate:
12310 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12314 This RTX is rejected from ix86_legitimate_address_p due to
12315 non-strictness of base register 97. Following this rejection,
12316 reload pushes all three components into separate registers,
12317 creating invalid memory address RTX.
12319 Following code reloads only the invalid part of the
12320 memory address RTX. */
12322 if (GET_CODE (x
) == PLUS
12323 && REG_P (XEXP (x
, 1))
12324 && GET_CODE (XEXP (x
, 0)) == PLUS
12325 && REG_P (XEXP (XEXP (x
, 0), 1)))
12328 bool something_reloaded
= false;
12330 base
= XEXP (XEXP (x
, 0), 1);
12331 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12333 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12334 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12335 opnum
, (enum reload_type
) type
);
12336 something_reloaded
= true;
12339 index
= XEXP (x
, 1);
12340 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12342 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12343 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12344 opnum
, (enum reload_type
) type
);
12345 something_reloaded
= true;
12348 gcc_assert (something_reloaded
);
12355 /* Recognizes RTL expressions that are valid memory addresses for an
12356 instruction. The MODE argument is the machine mode for the MEM
12357 expression that wants to use this address.
12359 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12360 convert common non-canonical forms to canonical form so that they will
12364 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12365 rtx addr
, bool strict
)
12367 struct ix86_address parts
;
12368 rtx base
, index
, disp
;
12369 HOST_WIDE_INT scale
;
12371 if (ix86_decompose_address (addr
, &parts
) <= 0)
12372 /* Decomposition failed. */
12376 index
= parts
.index
;
12378 scale
= parts
.scale
;
12380 /* Validate base register. */
12387 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12388 reg
= SUBREG_REG (base
);
12390 /* Base is not a register. */
12393 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12396 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12397 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12398 /* Base is not valid. */
12402 /* Validate index register. */
12409 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12410 reg
= SUBREG_REG (index
);
12412 /* Index is not a register. */
12415 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12418 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12419 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12420 /* Index is not valid. */
12424 /* Index and base should have the same mode. */
12426 && GET_MODE (base
) != GET_MODE (index
))
12429 /* Validate scale factor. */
12433 /* Scale without index. */
12436 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12437 /* Scale is not a valid multiplier. */
12441 /* Validate displacement. */
12444 if (GET_CODE (disp
) == CONST
12445 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12446 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12447 switch (XINT (XEXP (disp
, 0), 1))
12449 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12450 used. While ABI specify also 32bit relocations, we don't produce
12451 them at all and use IP relative instead. */
12453 case UNSPEC_GOTOFF
:
12454 gcc_assert (flag_pic
);
12456 goto is_legitimate_pic
;
12458 /* 64bit address unspec. */
12461 case UNSPEC_GOTPCREL
:
12463 gcc_assert (flag_pic
);
12464 goto is_legitimate_pic
;
12466 case UNSPEC_GOTTPOFF
:
12467 case UNSPEC_GOTNTPOFF
:
12468 case UNSPEC_INDNTPOFF
:
12469 case UNSPEC_NTPOFF
:
12470 case UNSPEC_DTPOFF
:
12473 case UNSPEC_STACK_CHECK
:
12474 gcc_assert (flag_split_stack
);
12478 /* Invalid address unspec. */
12482 else if (SYMBOLIC_CONST (disp
)
12486 && MACHOPIC_INDIRECT
12487 && !machopic_operand_p (disp
)
12493 if (TARGET_64BIT
&& (index
|| base
))
12495 /* foo@dtpoff(%rX) is ok. */
12496 if (GET_CODE (disp
) != CONST
12497 || GET_CODE (XEXP (disp
, 0)) != PLUS
12498 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12499 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12500 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12501 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12502 /* Non-constant pic memory reference. */
12505 else if ((!TARGET_MACHO
|| flag_pic
)
12506 && ! legitimate_pic_address_disp_p (disp
))
12507 /* Displacement is an invalid pic construct. */
12510 else if (MACHO_DYNAMIC_NO_PIC_P
12511 && !ix86_legitimate_constant_p (Pmode
, disp
))
12512 /* displacment must be referenced via non_lazy_pointer */
12516 /* This code used to verify that a symbolic pic displacement
12517 includes the pic_offset_table_rtx register.
12519 While this is good idea, unfortunately these constructs may
12520 be created by "adds using lea" optimization for incorrect
12529 This code is nonsensical, but results in addressing
12530 GOT table with pic_offset_table_rtx base. We can't
12531 just refuse it easily, since it gets matched by
12532 "addsi3" pattern, that later gets split to lea in the
12533 case output register differs from input. While this
12534 can be handled by separate addsi pattern for this case
12535 that never results in lea, this seems to be easier and
12536 correct fix for crash to disable this test. */
12538 else if (GET_CODE (disp
) != LABEL_REF
12539 && !CONST_INT_P (disp
)
12540 && (GET_CODE (disp
) != CONST
12541 || !ix86_legitimate_constant_p (Pmode
, disp
))
12542 && (GET_CODE (disp
) != SYMBOL_REF
12543 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12544 /* Displacement is not constant. */
12546 else if (TARGET_64BIT
12547 && !x86_64_immediate_operand (disp
, VOIDmode
))
12548 /* Displacement is out of range. */
12552 /* Everything looks valid. */
12556 /* Determine if a given RTX is a valid constant address. */
12559 constant_address_p (rtx x
)
12561 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12564 /* Return a unique alias set for the GOT. */
12566 static alias_set_type
12567 ix86_GOT_alias_set (void)
12569 static alias_set_type set
= -1;
12571 set
= new_alias_set ();
12575 /* Return a legitimate reference for ORIG (an address) using the
12576 register REG. If REG is 0, a new pseudo is generated.
12578 There are two types of references that must be handled:
12580 1. Global data references must load the address from the GOT, via
12581 the PIC reg. An insn is emitted to do this load, and the reg is
12584 2. Static data references, constant pool addresses, and code labels
12585 compute the address as an offset from the GOT, whose base is in
12586 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12587 differentiate them from global data objects. The returned
12588 address is the PIC reg + an unspec constant.
12590 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12591 reg also appears in the address. */
12594 legitimize_pic_address (rtx orig
, rtx reg
)
12597 rtx new_rtx
= orig
;
12601 if (TARGET_MACHO
&& !TARGET_64BIT
)
12604 reg
= gen_reg_rtx (Pmode
);
12605 /* Use the generic Mach-O PIC machinery. */
12606 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12610 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12612 else if (TARGET_64BIT
12613 && ix86_cmodel
!= CM_SMALL_PIC
12614 && gotoff_operand (addr
, Pmode
))
12617 /* This symbol may be referenced via a displacement from the PIC
12618 base address (@GOTOFF). */
12620 if (reload_in_progress
)
12621 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12622 if (GET_CODE (addr
) == CONST
)
12623 addr
= XEXP (addr
, 0);
12624 if (GET_CODE (addr
) == PLUS
)
12626 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12628 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12631 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12632 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12634 tmpreg
= gen_reg_rtx (Pmode
);
12637 emit_move_insn (tmpreg
, new_rtx
);
12641 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12642 tmpreg
, 1, OPTAB_DIRECT
);
12645 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12647 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12649 /* This symbol may be referenced via a displacement from the PIC
12650 base address (@GOTOFF). */
12652 if (reload_in_progress
)
12653 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12654 if (GET_CODE (addr
) == CONST
)
12655 addr
= XEXP (addr
, 0);
12656 if (GET_CODE (addr
) == PLUS
)
12658 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12660 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12663 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12664 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12665 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12669 emit_move_insn (reg
, new_rtx
);
12673 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12674 /* We can't use @GOTOFF for text labels on VxWorks;
12675 see gotoff_operand. */
12676 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12678 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12680 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12681 return legitimize_dllimport_symbol (addr
, true);
12682 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12683 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12684 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12686 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12687 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12691 /* For x64 PE-COFF there is no GOT table. So we use address
12693 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12695 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12696 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12699 reg
= gen_reg_rtx (Pmode
);
12700 emit_move_insn (reg
, new_rtx
);
12703 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12705 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12706 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12707 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12708 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12711 reg
= gen_reg_rtx (Pmode
);
12712 /* Use directly gen_movsi, otherwise the address is loaded
12713 into register for CSE. We don't want to CSE this addresses,
12714 instead we CSE addresses from the GOT table, so skip this. */
12715 emit_insn (gen_movsi (reg
, new_rtx
));
12720 /* This symbol must be referenced via a load from the
12721 Global Offset Table (@GOT). */
12723 if (reload_in_progress
)
12724 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12725 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12726 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12728 new_rtx
= force_reg (Pmode
, new_rtx
);
12729 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12730 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12731 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12734 reg
= gen_reg_rtx (Pmode
);
12735 emit_move_insn (reg
, new_rtx
);
12741 if (CONST_INT_P (addr
)
12742 && !x86_64_immediate_operand (addr
, VOIDmode
))
12746 emit_move_insn (reg
, addr
);
12750 new_rtx
= force_reg (Pmode
, addr
);
12752 else if (GET_CODE (addr
) == CONST
)
12754 addr
= XEXP (addr
, 0);
12756 /* We must match stuff we generate before. Assume the only
12757 unspecs that can get here are ours. Not that we could do
12758 anything with them anyway.... */
12759 if (GET_CODE (addr
) == UNSPEC
12760 || (GET_CODE (addr
) == PLUS
12761 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12763 gcc_assert (GET_CODE (addr
) == PLUS
);
12765 if (GET_CODE (addr
) == PLUS
)
12767 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12769 /* Check first to see if this is a constant offset from a @GOTOFF
12770 symbol reference. */
12771 if (gotoff_operand (op0
, Pmode
)
12772 && CONST_INT_P (op1
))
12776 if (reload_in_progress
)
12777 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12778 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12780 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12781 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12782 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12786 emit_move_insn (reg
, new_rtx
);
12792 if (INTVAL (op1
) < -16*1024*1024
12793 || INTVAL (op1
) >= 16*1024*1024)
12795 if (!x86_64_immediate_operand (op1
, Pmode
))
12796 op1
= force_reg (Pmode
, op1
);
12797 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12803 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12804 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12805 base
== reg
? NULL_RTX
: reg
);
12807 if (CONST_INT_P (new_rtx
))
12808 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
12811 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12813 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12814 new_rtx
= XEXP (new_rtx
, 1);
12816 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12824 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12827 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12829 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12831 if (GET_MODE (tp
) != tp_mode
)
12833 gcc_assert (GET_MODE (tp
) == SImode
);
12834 gcc_assert (tp_mode
== DImode
);
12836 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12840 tp
= copy_to_mode_reg (tp_mode
, tp
);
12845 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12847 static GTY(()) rtx ix86_tls_symbol
;
12850 ix86_tls_get_addr (void)
12852 if (!ix86_tls_symbol
)
12855 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12856 ? "___tls_get_addr" : "__tls_get_addr");
12858 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12861 return ix86_tls_symbol
;
12864 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12866 static GTY(()) rtx ix86_tls_module_base_symbol
;
12869 ix86_tls_module_base (void)
12871 if (!ix86_tls_module_base_symbol
)
12873 ix86_tls_module_base_symbol
12874 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12876 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12877 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12880 return ix86_tls_module_base_symbol
;
12883 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12884 false if we expect this to be used for a memory address and true if
12885 we expect to load the address into a register. */
12888 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12890 rtx dest
, base
, off
;
12891 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12892 enum machine_mode tp_mode
= Pmode
;
12897 case TLS_MODEL_GLOBAL_DYNAMIC
:
12898 dest
= gen_reg_rtx (Pmode
);
12903 pic
= pic_offset_table_rtx
;
12906 pic
= gen_reg_rtx (Pmode
);
12907 emit_insn (gen_set_got (pic
));
12911 if (TARGET_GNU2_TLS
)
12914 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12916 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12918 tp
= get_thread_pointer (Pmode
, true);
12919 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12921 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12925 rtx caddr
= ix86_tls_get_addr ();
12929 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12932 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
12934 insns
= get_insns ();
12937 RTL_CONST_CALL_P (insns
) = 1;
12938 emit_libcall_block (insns
, dest
, rax
, x
);
12941 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12945 case TLS_MODEL_LOCAL_DYNAMIC
:
12946 base
= gen_reg_rtx (Pmode
);
12951 pic
= pic_offset_table_rtx
;
12954 pic
= gen_reg_rtx (Pmode
);
12955 emit_insn (gen_set_got (pic
));
12959 if (TARGET_GNU2_TLS
)
12961 rtx tmp
= ix86_tls_module_base ();
12964 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12966 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12968 tp
= get_thread_pointer (Pmode
, true);
12969 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12970 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12974 rtx caddr
= ix86_tls_get_addr ();
12978 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12981 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
12983 insns
= get_insns ();
12986 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12987 share the LD_BASE result with other LD model accesses. */
12988 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12989 UNSPEC_TLS_LD_BASE
);
12991 RTL_CONST_CALL_P (insns
) = 1;
12992 emit_libcall_block (insns
, base
, rax
, eqv
);
12995 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12998 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12999 off
= gen_rtx_CONST (Pmode
, off
);
13001 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13003 if (TARGET_GNU2_TLS
)
13005 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13007 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13011 case TLS_MODEL_INITIAL_EXEC
:
13014 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13016 /* The Sun linker took the AMD64 TLS spec literally
13017 and can only handle %rax as destination of the
13018 initial executable code sequence. */
13020 dest
= gen_reg_rtx (DImode
);
13021 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13025 /* Generate DImode references to avoid %fs:(%reg32)
13026 problems and linker IE->LE relaxation bug. */
13029 type
= UNSPEC_GOTNTPOFF
;
13033 if (reload_in_progress
)
13034 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13035 pic
= pic_offset_table_rtx
;
13036 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13038 else if (!TARGET_ANY_GNU_TLS
)
13040 pic
= gen_reg_rtx (Pmode
);
13041 emit_insn (gen_set_got (pic
));
13042 type
= UNSPEC_GOTTPOFF
;
13047 type
= UNSPEC_INDNTPOFF
;
13050 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13051 off
= gen_rtx_CONST (tp_mode
, off
);
13053 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13054 off
= gen_const_mem (tp_mode
, off
);
13055 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13057 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13059 base
= get_thread_pointer (tp_mode
,
13060 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13061 off
= force_reg (tp_mode
, off
);
13062 return gen_rtx_PLUS (tp_mode
, base
, off
);
13066 base
= get_thread_pointer (Pmode
, true);
13067 dest
= gen_reg_rtx (Pmode
);
13068 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13072 case TLS_MODEL_LOCAL_EXEC
:
13073 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13074 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13075 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13076 off
= gen_rtx_CONST (Pmode
, off
);
13078 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13080 base
= get_thread_pointer (Pmode
,
13081 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13082 return gen_rtx_PLUS (Pmode
, base
, off
);
13086 base
= get_thread_pointer (Pmode
, true);
13087 dest
= gen_reg_rtx (Pmode
);
13088 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13093 gcc_unreachable ();
13099 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13102 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13103 htab_t dllimport_map
;
13106 get_dllimport_decl (tree decl
)
13108 struct tree_map
*h
, in
;
13111 const char *prefix
;
13112 size_t namelen
, prefixlen
;
13117 if (!dllimport_map
)
13118 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13120 in
.hash
= htab_hash_pointer (decl
);
13121 in
.base
.from
= decl
;
13122 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13123 h
= (struct tree_map
*) *loc
;
13127 *loc
= h
= ggc_alloc_tree_map ();
13129 h
->base
.from
= decl
;
13130 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13131 VAR_DECL
, NULL
, ptr_type_node
);
13132 DECL_ARTIFICIAL (to
) = 1;
13133 DECL_IGNORED_P (to
) = 1;
13134 DECL_EXTERNAL (to
) = 1;
13135 TREE_READONLY (to
) = 1;
13137 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13138 name
= targetm
.strip_name_encoding (name
);
13139 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13140 ? "*__imp_" : "*__imp__";
13141 namelen
= strlen (name
);
13142 prefixlen
= strlen (prefix
);
13143 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13144 memcpy (imp_name
, prefix
, prefixlen
);
13145 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13147 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13148 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13149 SET_SYMBOL_REF_DECL (rtl
, to
);
13150 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
13152 rtl
= gen_const_mem (Pmode
, rtl
);
13153 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13155 SET_DECL_RTL (to
, rtl
);
13156 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13161 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13162 true if we require the result be a register. */
13165 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13170 gcc_assert (SYMBOL_REF_DECL (symbol
));
13171 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
13173 x
= DECL_RTL (imp_decl
);
13175 x
= force_reg (Pmode
, x
);
13179 /* Try machine-dependent ways of modifying an illegitimate address
13180 to be legitimate. If we find one, return the new, valid address.
13181 This macro is used in only one place: `memory_address' in explow.c.
13183 OLDX is the address as it was before break_out_memory_refs was called.
13184 In some cases it is useful to look at this to decide what needs to be done.
13186 It is always safe for this macro to do nothing. It exists to recognize
13187 opportunities to optimize the output.
13189 For the 80386, we handle X+REG by loading X into a register R and
13190 using R+REG. R will go in a general reg and indexing will be used.
13191 However, if REG is a broken-out memory address or multiplication,
13192 nothing needs to be done because REG can certainly go in a general reg.
13194 When -fpic is used, special handling is needed for symbolic references.
13195 See comments by legitimize_pic_address in i386.c for details. */
13198 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13199 enum machine_mode mode
)
13204 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13206 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13207 if (GET_CODE (x
) == CONST
13208 && GET_CODE (XEXP (x
, 0)) == PLUS
13209 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13210 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13212 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13213 (enum tls_model
) log
, false);
13214 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13217 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13219 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13220 return legitimize_dllimport_symbol (x
, true);
13221 if (GET_CODE (x
) == CONST
13222 && GET_CODE (XEXP (x
, 0)) == PLUS
13223 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13224 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13226 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13227 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13231 if (flag_pic
&& SYMBOLIC_CONST (x
))
13232 return legitimize_pic_address (x
, 0);
13235 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13236 return machopic_indirect_data_reference (x
, 0);
13239 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13240 if (GET_CODE (x
) == ASHIFT
13241 && CONST_INT_P (XEXP (x
, 1))
13242 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13245 log
= INTVAL (XEXP (x
, 1));
13246 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13247 GEN_INT (1 << log
));
13250 if (GET_CODE (x
) == PLUS
)
13252 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13254 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13255 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13256 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13259 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13260 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13261 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13262 GEN_INT (1 << log
));
13265 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13266 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13267 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13270 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13271 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13272 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13273 GEN_INT (1 << log
));
13276 /* Put multiply first if it isn't already. */
13277 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13279 rtx tmp
= XEXP (x
, 0);
13280 XEXP (x
, 0) = XEXP (x
, 1);
13285 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13286 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13287 created by virtual register instantiation, register elimination, and
13288 similar optimizations. */
13289 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13292 x
= gen_rtx_PLUS (Pmode
,
13293 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13294 XEXP (XEXP (x
, 1), 0)),
13295 XEXP (XEXP (x
, 1), 1));
13299 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13300 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13301 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13302 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13303 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13304 && CONSTANT_P (XEXP (x
, 1)))
13307 rtx other
= NULL_RTX
;
13309 if (CONST_INT_P (XEXP (x
, 1)))
13311 constant
= XEXP (x
, 1);
13312 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13314 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13316 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13317 other
= XEXP (x
, 1);
13325 x
= gen_rtx_PLUS (Pmode
,
13326 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13327 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13328 plus_constant (Pmode
, other
,
13329 INTVAL (constant
)));
13333 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13336 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13339 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13342 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13345 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13349 && REG_P (XEXP (x
, 1))
13350 && REG_P (XEXP (x
, 0)))
13353 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13356 x
= legitimize_pic_address (x
, 0);
13359 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13362 if (REG_P (XEXP (x
, 0)))
13364 rtx temp
= gen_reg_rtx (Pmode
);
13365 rtx val
= force_operand (XEXP (x
, 1), temp
);
13368 if (GET_MODE (val
) != Pmode
)
13369 val
= convert_to_mode (Pmode
, val
, 1);
13370 emit_move_insn (temp
, val
);
13373 XEXP (x
, 1) = temp
;
13377 else if (REG_P (XEXP (x
, 1)))
13379 rtx temp
= gen_reg_rtx (Pmode
);
13380 rtx val
= force_operand (XEXP (x
, 0), temp
);
13383 if (GET_MODE (val
) != Pmode
)
13384 val
= convert_to_mode (Pmode
, val
, 1);
13385 emit_move_insn (temp
, val
);
13388 XEXP (x
, 0) = temp
;
13396 /* Print an integer constant expression in assembler syntax. Addition
13397 and subtraction are the only arithmetic that may appear in these
13398 expressions. FILE is the stdio stream to write to, X is the rtx, and
13399 CODE is the operand print code from the output string. */
13402 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13406 switch (GET_CODE (x
))
13409 gcc_assert (flag_pic
);
13414 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13415 output_addr_const (file
, x
);
13418 const char *name
= XSTR (x
, 0);
13420 /* Mark the decl as referenced so that cgraph will
13421 output the function. */
13422 if (SYMBOL_REF_DECL (x
))
13423 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13426 if (MACHOPIC_INDIRECT
13427 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13428 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13430 assemble_name (file
, name
);
13432 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13433 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13434 fputs ("@PLT", file
);
13441 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13442 assemble_name (asm_out_file
, buf
);
13446 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13450 /* This used to output parentheses around the expression,
13451 but that does not work on the 386 (either ATT or BSD assembler). */
13452 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13456 if (GET_MODE (x
) == VOIDmode
)
13458 /* We can use %d if the number is <32 bits and positive. */
13459 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13460 fprintf (file
, "0x%lx%08lx",
13461 (unsigned long) CONST_DOUBLE_HIGH (x
),
13462 (unsigned long) CONST_DOUBLE_LOW (x
));
13464 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13467 /* We can't handle floating point constants;
13468 TARGET_PRINT_OPERAND must handle them. */
13469 output_operand_lossage ("floating constant misused");
13473 /* Some assemblers need integer constants to appear first. */
13474 if (CONST_INT_P (XEXP (x
, 0)))
13476 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13478 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13482 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13483 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13485 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13491 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13492 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13494 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13496 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13500 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13502 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13507 gcc_assert (XVECLEN (x
, 0) == 1);
13508 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13509 switch (XINT (x
, 1))
13512 fputs ("@GOT", file
);
13514 case UNSPEC_GOTOFF
:
13515 fputs ("@GOTOFF", file
);
13517 case UNSPEC_PLTOFF
:
13518 fputs ("@PLTOFF", file
);
13521 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13522 "(%rip)" : "[rip]", file
);
13524 case UNSPEC_GOTPCREL
:
13525 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13526 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13528 case UNSPEC_GOTTPOFF
:
13529 /* FIXME: This might be @TPOFF in Sun ld too. */
13530 fputs ("@gottpoff", file
);
13533 fputs ("@tpoff", file
);
13535 case UNSPEC_NTPOFF
:
13537 fputs ("@tpoff", file
);
13539 fputs ("@ntpoff", file
);
13541 case UNSPEC_DTPOFF
:
13542 fputs ("@dtpoff", file
);
13544 case UNSPEC_GOTNTPOFF
:
13546 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13547 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13549 fputs ("@gotntpoff", file
);
13551 case UNSPEC_INDNTPOFF
:
13552 fputs ("@indntpoff", file
);
13555 case UNSPEC_MACHOPIC_OFFSET
:
13557 machopic_output_function_base_name (file
);
13561 output_operand_lossage ("invalid UNSPEC as operand");
13567 output_operand_lossage ("invalid expression as operand");
13571 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13572 We need to emit DTP-relative relocations. */
13574 static void ATTRIBUTE_UNUSED
13575 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13577 fputs (ASM_LONG
, file
);
13578 output_addr_const (file
, x
);
13579 fputs ("@dtpoff", file
);
13585 fputs (", 0", file
);
13588 gcc_unreachable ();
13592 /* Return true if X is a representation of the PIC register. This copes
13593 with calls from ix86_find_base_term, where the register might have
13594 been replaced by a cselib value. */
13597 ix86_pic_register_p (rtx x
)
13599 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13600 return (pic_offset_table_rtx
13601 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13603 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13606 /* Helper function for ix86_delegitimize_address.
13607 Attempt to delegitimize TLS local-exec accesses. */
13610 ix86_delegitimize_tls_address (rtx orig_x
)
13612 rtx x
= orig_x
, unspec
;
13613 struct ix86_address addr
;
13615 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13619 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13621 if (ix86_decompose_address (x
, &addr
) == 0
13622 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13623 || addr
.disp
== NULL_RTX
13624 || GET_CODE (addr
.disp
) != CONST
)
13626 unspec
= XEXP (addr
.disp
, 0);
13627 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13628 unspec
= XEXP (unspec
, 0);
13629 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13631 x
= XVECEXP (unspec
, 0, 0);
13632 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13633 if (unspec
!= XEXP (addr
.disp
, 0))
13634 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13637 rtx idx
= addr
.index
;
13638 if (addr
.scale
!= 1)
13639 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13640 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13643 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13644 if (MEM_P (orig_x
))
13645 x
= replace_equiv_address_nv (orig_x
, x
);
13649 /* In the name of slightly smaller debug output, and to cater to
13650 general assembler lossage, recognize PIC+GOTOFF and turn it back
13651 into a direct symbol reference.
13653 On Darwin, this is necessary to avoid a crash, because Darwin
13654 has a different PIC label for each routine but the DWARF debugging
13655 information is not associated with any particular routine, so it's
13656 necessary to remove references to the PIC label from RTL stored by
13657 the DWARF output code. */
13660 ix86_delegitimize_address (rtx x
)
13662 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13663 /* addend is NULL or some rtx if x is something+GOTOFF where
13664 something doesn't include the PIC register. */
13665 rtx addend
= NULL_RTX
;
13666 /* reg_addend is NULL or a multiple of some register. */
13667 rtx reg_addend
= NULL_RTX
;
13668 /* const_addend is NULL or a const_int. */
13669 rtx const_addend
= NULL_RTX
;
13670 /* This is the result, or NULL. */
13671 rtx result
= NULL_RTX
;
13680 if (GET_CODE (x
) == CONST
13681 && GET_CODE (XEXP (x
, 0)) == PLUS
13682 && GET_MODE (XEXP (x
, 0)) == Pmode
13683 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13684 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13685 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13687 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13688 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13689 if (MEM_P (orig_x
))
13690 x
= replace_equiv_address_nv (orig_x
, x
);
13693 if (GET_CODE (x
) != CONST
13694 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13695 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13696 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13697 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13698 return ix86_delegitimize_tls_address (orig_x
);
13699 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13700 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13702 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13710 if (GET_CODE (x
) != PLUS
13711 || GET_CODE (XEXP (x
, 1)) != CONST
)
13712 return ix86_delegitimize_tls_address (orig_x
);
13714 if (ix86_pic_register_p (XEXP (x
, 0)))
13715 /* %ebx + GOT/GOTOFF */
13717 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13719 /* %ebx + %reg * scale + GOT/GOTOFF */
13720 reg_addend
= XEXP (x
, 0);
13721 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13722 reg_addend
= XEXP (reg_addend
, 1);
13723 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13724 reg_addend
= XEXP (reg_addend
, 0);
13727 reg_addend
= NULL_RTX
;
13728 addend
= XEXP (x
, 0);
13732 addend
= XEXP (x
, 0);
13734 x
= XEXP (XEXP (x
, 1), 0);
13735 if (GET_CODE (x
) == PLUS
13736 && CONST_INT_P (XEXP (x
, 1)))
13738 const_addend
= XEXP (x
, 1);
13742 if (GET_CODE (x
) == UNSPEC
13743 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13744 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13745 result
= XVECEXP (x
, 0, 0);
13747 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13748 && !MEM_P (orig_x
))
13749 result
= XVECEXP (x
, 0, 0);
13752 return ix86_delegitimize_tls_address (orig_x
);
13755 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13757 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13760 /* If the rest of original X doesn't involve the PIC register, add
13761 addend and subtract pic_offset_table_rtx. This can happen e.g.
13763 leal (%ebx, %ecx, 4), %ecx
13765 movl foo@GOTOFF(%ecx), %edx
13766 in which case we return (%ecx - %ebx) + foo. */
13767 if (pic_offset_table_rtx
)
13768 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13769 pic_offset_table_rtx
),
13774 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13776 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13777 if (result
== NULL_RTX
)
13783 /* If X is a machine specific address (i.e. a symbol or label being
13784 referenced as a displacement from the GOT implemented using an
13785 UNSPEC), then return the base term. Otherwise return X. */
13788 ix86_find_base_term (rtx x
)
13794 if (GET_CODE (x
) != CONST
)
13796 term
= XEXP (x
, 0);
13797 if (GET_CODE (term
) == PLUS
13798 && (CONST_INT_P (XEXP (term
, 1))
13799 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13800 term
= XEXP (term
, 0);
13801 if (GET_CODE (term
) != UNSPEC
13802 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13803 && XINT (term
, 1) != UNSPEC_PCREL
))
13806 return XVECEXP (term
, 0, 0);
13809 return ix86_delegitimize_address (x
);
13813 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13814 bool fp
, FILE *file
)
13816 const char *suffix
;
13818 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13820 code
= ix86_fp_compare_code_to_integer (code
);
13824 code
= reverse_condition (code
);
13875 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13879 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13880 Those same assemblers have the same but opposite lossage on cmov. */
13881 if (mode
== CCmode
)
13882 suffix
= fp
? "nbe" : "a";
13883 else if (mode
== CCCmode
)
13886 gcc_unreachable ();
13902 gcc_unreachable ();
13906 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13923 gcc_unreachable ();
13927 /* ??? As above. */
13928 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13929 suffix
= fp
? "nb" : "ae";
13932 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13936 /* ??? As above. */
13937 if (mode
== CCmode
)
13939 else if (mode
== CCCmode
)
13940 suffix
= fp
? "nb" : "ae";
13942 gcc_unreachable ();
13945 suffix
= fp
? "u" : "p";
13948 suffix
= fp
? "nu" : "np";
13951 gcc_unreachable ();
13953 fputs (suffix
, file
);
13956 /* Print the name of register X to FILE based on its machine mode and number.
13957 If CODE is 'w', pretend the mode is HImode.
13958 If CODE is 'b', pretend the mode is QImode.
13959 If CODE is 'k', pretend the mode is SImode.
13960 If CODE is 'q', pretend the mode is DImode.
13961 If CODE is 'x', pretend the mode is V4SFmode.
13962 If CODE is 't', pretend the mode is V8SFmode.
13963 If CODE is 'h', pretend the reg is the 'high' byte register.
13964 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13965 If CODE is 'd', duplicate the operand for AVX instruction.
13969 print_reg (rtx x
, int code
, FILE *file
)
13972 bool duplicated
= code
== 'd' && TARGET_AVX
;
13974 gcc_assert (x
== pc_rtx
13975 || (REGNO (x
) != ARG_POINTER_REGNUM
13976 && REGNO (x
) != FRAME_POINTER_REGNUM
13977 && REGNO (x
) != FLAGS_REG
13978 && REGNO (x
) != FPSR_REG
13979 && REGNO (x
) != FPCR_REG
));
13981 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13986 gcc_assert (TARGET_64BIT
);
13987 fputs ("rip", file
);
13991 if (code
== 'w' || MMX_REG_P (x
))
13993 else if (code
== 'b')
13995 else if (code
== 'k')
13997 else if (code
== 'q')
13999 else if (code
== 'y')
14001 else if (code
== 'h')
14003 else if (code
== 'x')
14005 else if (code
== 't')
14008 code
= GET_MODE_SIZE (GET_MODE (x
));
14010 /* Irritatingly, AMD extended registers use different naming convention
14011 from the normal registers: "r%d[bwd]" */
14012 if (REX_INT_REG_P (x
))
14014 gcc_assert (TARGET_64BIT
);
14016 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
14020 error ("extended registers have no high halves");
14035 error ("unsupported operand size for extended register");
14045 if (STACK_TOP_P (x
))
14054 if (! ANY_FP_REG_P (x
))
14055 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14060 reg
= hi_reg_name
[REGNO (x
)];
14063 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
14065 reg
= qi_reg_name
[REGNO (x
)];
14068 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
14070 reg
= qi_high_reg_name
[REGNO (x
)];
14075 gcc_assert (!duplicated
);
14077 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14082 gcc_unreachable ();
14088 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14089 fprintf (file
, ", %%%s", reg
);
14091 fprintf (file
, ", %s", reg
);
14095 /* Locate some local-dynamic symbol still in use by this function
14096 so that we can print its name in some tls_local_dynamic_base
14100 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14104 if (GET_CODE (x
) == SYMBOL_REF
14105 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14107 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14114 static const char *
14115 get_some_local_dynamic_name (void)
14119 if (cfun
->machine
->some_ld_name
)
14120 return cfun
->machine
->some_ld_name
;
14122 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14123 if (NONDEBUG_INSN_P (insn
)
14124 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14125 return cfun
->machine
->some_ld_name
;
14130 /* Meaning of CODE:
14131 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14132 C -- print opcode suffix for set/cmov insn.
14133 c -- like C, but print reversed condition
14134 F,f -- likewise, but for floating-point.
14135 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14137 R -- print the prefix for register names.
14138 z -- print the opcode suffix for the size of the current operand.
14139 Z -- likewise, with special suffixes for x87 instructions.
14140 * -- print a star (in certain assembler syntax)
14141 A -- print an absolute memory reference.
14142 E -- print address with DImode register names if TARGET_64BIT.
14143 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14144 s -- print a shift double count, followed by the assemblers argument
14146 b -- print the QImode name of the register for the indicated operand.
14147 %b0 would print %al if operands[0] is reg 0.
14148 w -- likewise, print the HImode name of the register.
14149 k -- likewise, print the SImode name of the register.
14150 q -- likewise, print the DImode name of the register.
14151 x -- likewise, print the V4SFmode name of the register.
14152 t -- likewise, print the V8SFmode name of the register.
14153 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14154 y -- print "st(0)" instead of "st" as a register.
14155 d -- print duplicated register operand for AVX instruction.
14156 D -- print condition for SSE cmp instruction.
14157 P -- if PIC, print an @PLT suffix.
14158 p -- print raw symbol name.
14159 X -- don't print any sort of PIC '@' suffix for a symbol.
14160 & -- print some in-use local-dynamic symbol name.
14161 H -- print a memory address offset by 8; used for sse high-parts
14162 Y -- print condition for XOP pcom* instruction.
14163 + -- print a branch hint as 'cs' or 'ds' prefix
14164 ; -- print a semicolon (after prefixes due to bug in older gas).
14165 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14166 @ -- print a segment register of thread base pointer load
14167 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14171 ix86_print_operand (FILE *file
, rtx x
, int code
)
14178 switch (ASSEMBLER_DIALECT
)
14185 /* Intel syntax. For absolute addresses, registers should not
14186 be surrounded by braces. */
14190 ix86_print_operand (file
, x
, 0);
14197 gcc_unreachable ();
14200 ix86_print_operand (file
, x
, 0);
14204 /* Wrap address in an UNSPEC to declare special handling. */
14206 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14208 output_address (x
);
14212 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14217 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14222 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14227 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14232 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14237 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14242 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14243 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14246 switch (GET_MODE_SIZE (GET_MODE (x
)))
14261 output_operand_lossage
14262 ("invalid operand size for operand code 'O'");
14271 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14273 /* Opcodes don't get size suffixes if using Intel opcodes. */
14274 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14277 switch (GET_MODE_SIZE (GET_MODE (x
)))
14296 output_operand_lossage
14297 ("invalid operand size for operand code 'z'");
14302 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14304 (0, "non-integer operand used with operand code 'z'");
14308 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14309 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14312 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14314 switch (GET_MODE_SIZE (GET_MODE (x
)))
14317 #ifdef HAVE_AS_IX86_FILDS
14327 #ifdef HAVE_AS_IX86_FILDQ
14330 fputs ("ll", file
);
14338 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14340 /* 387 opcodes don't get size suffixes
14341 if the operands are registers. */
14342 if (STACK_REG_P (x
))
14345 switch (GET_MODE_SIZE (GET_MODE (x
)))
14366 output_operand_lossage
14367 ("invalid operand type used with operand code 'Z'");
14371 output_operand_lossage
14372 ("invalid operand size for operand code 'Z'");
14390 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14392 ix86_print_operand (file
, x
, 0);
14393 fputs (", ", file
);
14398 switch (GET_CODE (x
))
14401 fputs ("neq", file
);
14404 fputs ("eq", file
);
14408 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14412 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14416 fputs ("le", file
);
14420 fputs ("lt", file
);
14423 fputs ("unord", file
);
14426 fputs ("ord", file
);
14429 fputs ("ueq", file
);
14432 fputs ("nlt", file
);
14435 fputs ("nle", file
);
14438 fputs ("ule", file
);
14441 fputs ("ult", file
);
14444 fputs ("une", file
);
14447 output_operand_lossage ("operand is not a condition code, "
14448 "invalid operand code 'Y'");
14454 /* Little bit of braindamage here. The SSE compare instructions
14455 does use completely different names for the comparisons that the
14456 fp conditional moves. */
14457 switch (GET_CODE (x
))
14462 fputs ("eq_us", file
);
14466 fputs ("eq", file
);
14471 fputs ("nge", file
);
14475 fputs ("lt", file
);
14480 fputs ("ngt", file
);
14484 fputs ("le", file
);
14487 fputs ("unord", file
);
14492 fputs ("neq_oq", file
);
14496 fputs ("neq", file
);
14501 fputs ("ge", file
);
14505 fputs ("nlt", file
);
14510 fputs ("gt", file
);
14514 fputs ("nle", file
);
14517 fputs ("ord", file
);
14520 output_operand_lossage ("operand is not a condition code, "
14521 "invalid operand code 'D'");
14528 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14529 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14535 if (!COMPARISON_P (x
))
14537 output_operand_lossage ("operand is not a condition code, "
14538 "invalid operand code '%c'", code
);
14541 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14542 code
== 'c' || code
== 'f',
14543 code
== 'F' || code
== 'f',
14548 if (!offsettable_memref_p (x
))
14550 output_operand_lossage ("operand is not an offsettable memory "
14551 "reference, invalid operand code 'H'");
14554 /* It doesn't actually matter what mode we use here, as we're
14555 only going to use this for printing. */
14556 x
= adjust_address_nv (x
, DImode
, 8);
14560 gcc_assert (CONST_INT_P (x
));
14562 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14563 #ifdef HAVE_AS_IX86_HLE
14564 fputs ("xacquire ", file
);
14566 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14568 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14569 #ifdef HAVE_AS_IX86_HLE
14570 fputs ("xrelease ", file
);
14572 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14574 /* We do not want to print value of the operand. */
14578 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14584 const char *name
= get_some_local_dynamic_name ();
14586 output_operand_lossage ("'%%&' used without any "
14587 "local dynamic TLS references");
14589 assemble_name (file
, name
);
14598 || optimize_function_for_size_p (cfun
)
14599 || !TARGET_BRANCH_PREDICTION_HINTS
)
14602 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14605 int pred_val
= INTVAL (XEXP (x
, 0));
14607 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14608 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14610 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14612 = final_forward_branch_p (current_output_insn
) == 0;
14614 /* Emit hints only in the case default branch prediction
14615 heuristics would fail. */
14616 if (taken
!= cputaken
)
14618 /* We use 3e (DS) prefix for taken branches and
14619 2e (CS) prefix for not taken branches. */
14621 fputs ("ds ; ", file
);
14623 fputs ("cs ; ", file
);
14631 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14637 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14640 /* The kernel uses a different segment register for performance
14641 reasons; a system call would not have to trash the userspace
14642 segment register, which would be expensive. */
14643 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14644 fputs ("fs", file
);
14646 fputs ("gs", file
);
14650 putc (TARGET_AVX2
? 'i' : 'f', file
);
14654 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14655 fputs ("addr32 ", file
);
14659 output_operand_lossage ("invalid operand code '%c'", code
);
14664 print_reg (x
, code
, file
);
14666 else if (MEM_P (x
))
14668 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14669 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14670 && GET_MODE (x
) != BLKmode
)
14673 switch (GET_MODE_SIZE (GET_MODE (x
)))
14675 case 1: size
= "BYTE"; break;
14676 case 2: size
= "WORD"; break;
14677 case 4: size
= "DWORD"; break;
14678 case 8: size
= "QWORD"; break;
14679 case 12: size
= "TBYTE"; break;
14681 if (GET_MODE (x
) == XFmode
)
14686 case 32: size
= "YMMWORD"; break;
14688 gcc_unreachable ();
14691 /* Check for explicit size override (codes 'b', 'w', 'k',
14695 else if (code
== 'w')
14697 else if (code
== 'k')
14699 else if (code
== 'q')
14701 else if (code
== 'x')
14704 fputs (size
, file
);
14705 fputs (" PTR ", file
);
14709 /* Avoid (%rip) for call operands. */
14710 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14711 && !CONST_INT_P (x
))
14712 output_addr_const (file
, x
);
14713 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14714 output_operand_lossage ("invalid constraints for operand");
14716 output_address (x
);
14719 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14724 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14725 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14727 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14729 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14731 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14733 fprintf (file
, "0x%08x", (unsigned int) l
);
14736 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14741 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14742 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14744 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14746 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14749 /* These float cases don't actually occur as immediate operands. */
14750 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14754 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14755 fputs (dstr
, file
);
14760 /* We have patterns that allow zero sets of memory, for instance.
14761 In 64-bit mode, we should probably support all 8-byte vectors,
14762 since we can in fact encode that into an immediate. */
14763 if (GET_CODE (x
) == CONST_VECTOR
)
14765 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14769 if (code
!= 'P' && code
!= 'p')
14771 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14773 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14776 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14777 || GET_CODE (x
) == LABEL_REF
)
14779 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14782 fputs ("OFFSET FLAT:", file
);
14785 if (CONST_INT_P (x
))
14786 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14787 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14788 output_pic_addr_const (file
, x
, code
);
14790 output_addr_const (file
, x
);
14795 ix86_print_operand_punct_valid_p (unsigned char code
)
14797 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14798 || code
== ';' || code
== '~' || code
== '^');
14801 /* Print a memory operand whose address is ADDR. */
14804 ix86_print_operand_address (FILE *file
, rtx addr
)
14806 struct ix86_address parts
;
14807 rtx base
, index
, disp
;
14813 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14815 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14816 gcc_assert (parts
.index
== NULL_RTX
);
14817 parts
.index
= XVECEXP (addr
, 0, 1);
14818 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14819 addr
= XVECEXP (addr
, 0, 0);
14822 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14824 gcc_assert (TARGET_64BIT
);
14825 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14829 ok
= ix86_decompose_address (addr
, &parts
);
14833 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14835 rtx tmp
= SUBREG_REG (parts
.base
);
14836 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14837 tmp
, GET_MODE (tmp
), 0);
14838 gcc_assert (parts
.base
!= NULL_RTX
);
14841 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14843 rtx tmp
= SUBREG_REG (parts
.index
);
14844 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14845 tmp
, GET_MODE (tmp
), 0);
14846 gcc_assert (parts
.index
!= NULL_RTX
);
14850 index
= parts
.index
;
14852 scale
= parts
.scale
;
14860 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14862 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14865 gcc_unreachable ();
14868 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14869 if (TARGET_64BIT
&& !base
&& !index
)
14873 if (GET_CODE (disp
) == CONST
14874 && GET_CODE (XEXP (disp
, 0)) == PLUS
14875 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14876 symbol
= XEXP (XEXP (disp
, 0), 0);
14878 if (GET_CODE (symbol
) == LABEL_REF
14879 || (GET_CODE (symbol
) == SYMBOL_REF
14880 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14883 if (!base
&& !index
)
14885 /* Displacement only requires special attention. */
14887 if (CONST_INT_P (disp
))
14889 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14890 fputs ("ds:", file
);
14891 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14894 output_pic_addr_const (file
, disp
, 0);
14896 output_addr_const (file
, disp
);
14900 /* Print SImode register names to force addr32 prefix. */
14901 if (GET_CODE (addr
) == SUBREG
)
14903 gcc_assert (TARGET_64BIT
);
14904 gcc_assert (GET_MODE (addr
) == SImode
);
14905 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14906 gcc_assert (!code
);
14909 else if (GET_CODE (addr
) == ZERO_EXTEND
14910 || GET_CODE (addr
) == AND
)
14912 gcc_assert (TARGET_64BIT
);
14913 gcc_assert (GET_MODE (addr
) == DImode
);
14914 gcc_assert (!code
);
14918 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14923 output_pic_addr_const (file
, disp
, 0);
14924 else if (GET_CODE (disp
) == LABEL_REF
)
14925 output_asm_label (disp
);
14927 output_addr_const (file
, disp
);
14932 print_reg (base
, code
, file
);
14936 print_reg (index
, vsib
? 0 : code
, file
);
14937 if (scale
!= 1 || vsib
)
14938 fprintf (file
, ",%d", scale
);
14944 rtx offset
= NULL_RTX
;
14948 /* Pull out the offset of a symbol; print any symbol itself. */
14949 if (GET_CODE (disp
) == CONST
14950 && GET_CODE (XEXP (disp
, 0)) == PLUS
14951 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14953 offset
= XEXP (XEXP (disp
, 0), 1);
14954 disp
= gen_rtx_CONST (VOIDmode
,
14955 XEXP (XEXP (disp
, 0), 0));
14959 output_pic_addr_const (file
, disp
, 0);
14960 else if (GET_CODE (disp
) == LABEL_REF
)
14961 output_asm_label (disp
);
14962 else if (CONST_INT_P (disp
))
14965 output_addr_const (file
, disp
);
14971 print_reg (base
, code
, file
);
14974 if (INTVAL (offset
) >= 0)
14976 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14980 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14987 print_reg (index
, vsib
? 0 : code
, file
);
14988 if (scale
!= 1 || vsib
)
14989 fprintf (file
, "*%d", scale
);
14996 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14999 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15003 if (GET_CODE (x
) != UNSPEC
)
15006 op
= XVECEXP (x
, 0, 0);
15007 switch (XINT (x
, 1))
15009 case UNSPEC_GOTTPOFF
:
15010 output_addr_const (file
, op
);
15011 /* FIXME: This might be @TPOFF in Sun ld. */
15012 fputs ("@gottpoff", file
);
15015 output_addr_const (file
, op
);
15016 fputs ("@tpoff", file
);
15018 case UNSPEC_NTPOFF
:
15019 output_addr_const (file
, op
);
15021 fputs ("@tpoff", file
);
15023 fputs ("@ntpoff", file
);
15025 case UNSPEC_DTPOFF
:
15026 output_addr_const (file
, op
);
15027 fputs ("@dtpoff", file
);
15029 case UNSPEC_GOTNTPOFF
:
15030 output_addr_const (file
, op
);
15032 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15033 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15035 fputs ("@gotntpoff", file
);
15037 case UNSPEC_INDNTPOFF
:
15038 output_addr_const (file
, op
);
15039 fputs ("@indntpoff", file
);
15042 case UNSPEC_MACHOPIC_OFFSET
:
15043 output_addr_const (file
, op
);
15045 machopic_output_function_base_name (file
);
15049 case UNSPEC_STACK_CHECK
:
15053 gcc_assert (flag_split_stack
);
15055 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15056 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15058 gcc_unreachable ();
15061 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15072 /* Split one or more double-mode RTL references into pairs of half-mode
15073 references. The RTL can be REG, offsettable MEM, integer constant, or
15074 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15075 split and "num" is its length. lo_half and hi_half are output arrays
15076 that parallel "operands". */
15079 split_double_mode (enum machine_mode mode
, rtx operands
[],
15080 int num
, rtx lo_half
[], rtx hi_half
[])
15082 enum machine_mode half_mode
;
15088 half_mode
= DImode
;
15091 half_mode
= SImode
;
15094 gcc_unreachable ();
15097 byte
= GET_MODE_SIZE (half_mode
);
15101 rtx op
= operands
[num
];
15103 /* simplify_subreg refuse to split volatile memory addresses,
15104 but we still have to handle it. */
15107 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15108 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15112 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15113 GET_MODE (op
) == VOIDmode
15114 ? mode
: GET_MODE (op
), 0);
15115 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15116 GET_MODE (op
) == VOIDmode
15117 ? mode
: GET_MODE (op
), byte
);
15122 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15123 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15124 is the expression of the binary operation. The output may either be
15125 emitted here, or returned to the caller, like all output_* functions.
15127 There is no guarantee that the operands are the same mode, as they
15128 might be within FLOAT or FLOAT_EXTEND expressions. */
15130 #ifndef SYSV386_COMPAT
15131 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15132 wants to fix the assemblers because that causes incompatibility
15133 with gcc. No-one wants to fix gcc because that causes
15134 incompatibility with assemblers... You can use the option of
15135 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15136 #define SYSV386_COMPAT 1
15140 output_387_binary_op (rtx insn
, rtx
*operands
)
15142 static char buf
[40];
15145 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15147 #ifdef ENABLE_CHECKING
15148 /* Even if we do not want to check the inputs, this documents input
15149 constraints. Which helps in understanding the following code. */
15150 if (STACK_REG_P (operands
[0])
15151 && ((REG_P (operands
[1])
15152 && REGNO (operands
[0]) == REGNO (operands
[1])
15153 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15154 || (REG_P (operands
[2])
15155 && REGNO (operands
[0]) == REGNO (operands
[2])
15156 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15157 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15160 gcc_assert (is_sse
);
15163 switch (GET_CODE (operands
[3]))
15166 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15167 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15175 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15176 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15184 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15185 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15193 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15194 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15202 gcc_unreachable ();
15209 strcpy (buf
, ssep
);
15210 if (GET_MODE (operands
[0]) == SFmode
)
15211 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15213 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15217 strcpy (buf
, ssep
+ 1);
15218 if (GET_MODE (operands
[0]) == SFmode
)
15219 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15221 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15227 switch (GET_CODE (operands
[3]))
15231 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15233 rtx temp
= operands
[2];
15234 operands
[2] = operands
[1];
15235 operands
[1] = temp
;
15238 /* know operands[0] == operands[1]. */
15240 if (MEM_P (operands
[2]))
15246 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15248 if (STACK_TOP_P (operands
[0]))
15249 /* How is it that we are storing to a dead operand[2]?
15250 Well, presumably operands[1] is dead too. We can't
15251 store the result to st(0) as st(0) gets popped on this
15252 instruction. Instead store to operands[2] (which I
15253 think has to be st(1)). st(1) will be popped later.
15254 gcc <= 2.8.1 didn't have this check and generated
15255 assembly code that the Unixware assembler rejected. */
15256 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15258 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15262 if (STACK_TOP_P (operands
[0]))
15263 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15265 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15270 if (MEM_P (operands
[1]))
15276 if (MEM_P (operands
[2]))
15282 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15285 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15286 derived assemblers, confusingly reverse the direction of
15287 the operation for fsub{r} and fdiv{r} when the
15288 destination register is not st(0). The Intel assembler
15289 doesn't have this brain damage. Read !SYSV386_COMPAT to
15290 figure out what the hardware really does. */
15291 if (STACK_TOP_P (operands
[0]))
15292 p
= "{p\t%0, %2|rp\t%2, %0}";
15294 p
= "{rp\t%2, %0|p\t%0, %2}";
15296 if (STACK_TOP_P (operands
[0]))
15297 /* As above for fmul/fadd, we can't store to st(0). */
15298 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15300 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15305 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15308 if (STACK_TOP_P (operands
[0]))
15309 p
= "{rp\t%0, %1|p\t%1, %0}";
15311 p
= "{p\t%1, %0|rp\t%0, %1}";
15313 if (STACK_TOP_P (operands
[0]))
15314 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15316 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15321 if (STACK_TOP_P (operands
[0]))
15323 if (STACK_TOP_P (operands
[1]))
15324 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15326 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15329 else if (STACK_TOP_P (operands
[1]))
15332 p
= "{\t%1, %0|r\t%0, %1}";
15334 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15340 p
= "{r\t%2, %0|\t%0, %2}";
15342 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15348 gcc_unreachable ();
15355 /* Return needed mode for entity in optimize_mode_switching pass. */
15358 ix86_mode_needed (int entity
, rtx insn
)
15360 enum attr_i387_cw mode
;
15362 /* The mode UNINITIALIZED is used to store control word after a
15363 function call or ASM pattern. The mode ANY specify that function
15364 has no requirements on the control word and make no changes in the
15365 bits we are interested in. */
15368 || (NONJUMP_INSN_P (insn
)
15369 && (asm_noperands (PATTERN (insn
)) >= 0
15370 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15371 return I387_CW_UNINITIALIZED
;
15373 if (recog_memoized (insn
) < 0)
15374 return I387_CW_ANY
;
15376 mode
= get_attr_i387_cw (insn
);
15381 if (mode
== I387_CW_TRUNC
)
15386 if (mode
== I387_CW_FLOOR
)
15391 if (mode
== I387_CW_CEIL
)
15396 if (mode
== I387_CW_MASK_PM
)
15401 gcc_unreachable ();
15404 return I387_CW_ANY
;
15407 /* Output code to initialize control word copies used by trunc?f?i and
15408 rounding patterns. CURRENT_MODE is set to current control word,
15409 while NEW_MODE is set to new control word. */
15412 emit_i387_cw_initialization (int mode
)
15414 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15417 enum ix86_stack_slot slot
;
15419 rtx reg
= gen_reg_rtx (HImode
);
15421 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15422 emit_move_insn (reg
, copy_rtx (stored_mode
));
15424 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15425 || optimize_function_for_size_p (cfun
))
15429 case I387_CW_TRUNC
:
15430 /* round toward zero (truncate) */
15431 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15432 slot
= SLOT_CW_TRUNC
;
15435 case I387_CW_FLOOR
:
15436 /* round down toward -oo */
15437 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15438 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15439 slot
= SLOT_CW_FLOOR
;
15443 /* round up toward +oo */
15444 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15445 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15446 slot
= SLOT_CW_CEIL
;
15449 case I387_CW_MASK_PM
:
15450 /* mask precision exception for nearbyint() */
15451 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15452 slot
= SLOT_CW_MASK_PM
;
15456 gcc_unreachable ();
15463 case I387_CW_TRUNC
:
15464 /* round toward zero (truncate) */
15465 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15466 slot
= SLOT_CW_TRUNC
;
15469 case I387_CW_FLOOR
:
15470 /* round down toward -oo */
15471 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15472 slot
= SLOT_CW_FLOOR
;
15476 /* round up toward +oo */
15477 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15478 slot
= SLOT_CW_CEIL
;
15481 case I387_CW_MASK_PM
:
15482 /* mask precision exception for nearbyint() */
15483 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15484 slot
= SLOT_CW_MASK_PM
;
15488 gcc_unreachable ();
15492 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15494 new_mode
= assign_386_stack_local (HImode
, slot
);
15495 emit_move_insn (new_mode
, reg
);
15498 /* Output code for INSN to convert a float to a signed int. OPERANDS
15499 are the insn operands. The output may be [HSD]Imode and the input
15500 operand may be [SDX]Fmode. */
15503 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15505 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15506 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15507 int round_mode
= get_attr_i387_cw (insn
);
15509 /* Jump through a hoop or two for DImode, since the hardware has no
15510 non-popping instruction. We used to do this a different way, but
15511 that was somewhat fragile and broke with post-reload splitters. */
15512 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15513 output_asm_insn ("fld\t%y1", operands
);
15515 gcc_assert (STACK_TOP_P (operands
[1]));
15516 gcc_assert (MEM_P (operands
[0]));
15517 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15520 output_asm_insn ("fisttp%Z0\t%0", operands
);
15523 if (round_mode
!= I387_CW_ANY
)
15524 output_asm_insn ("fldcw\t%3", operands
);
15525 if (stack_top_dies
|| dimode_p
)
15526 output_asm_insn ("fistp%Z0\t%0", operands
);
15528 output_asm_insn ("fist%Z0\t%0", operands
);
15529 if (round_mode
!= I387_CW_ANY
)
15530 output_asm_insn ("fldcw\t%2", operands
);
15536 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15537 have the values zero or one, indicates the ffreep insn's operand
15538 from the OPERANDS array. */
15540 static const char *
15541 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15543 if (TARGET_USE_FFREEP
)
15544 #ifdef HAVE_AS_IX86_FFREEP
15545 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15548 static char retval
[32];
15549 int regno
= REGNO (operands
[opno
]);
15551 gcc_assert (FP_REGNO_P (regno
));
15553 regno
-= FIRST_STACK_REG
;
15555 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15560 return opno
? "fstp\t%y1" : "fstp\t%y0";
15564 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15565 should be used. UNORDERED_P is true when fucom should be used. */
15568 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15570 int stack_top_dies
;
15571 rtx cmp_op0
, cmp_op1
;
15572 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15576 cmp_op0
= operands
[0];
15577 cmp_op1
= operands
[1];
15581 cmp_op0
= operands
[1];
15582 cmp_op1
= operands
[2];
15587 if (GET_MODE (operands
[0]) == SFmode
)
15589 return "%vucomiss\t{%1, %0|%0, %1}";
15591 return "%vcomiss\t{%1, %0|%0, %1}";
15594 return "%vucomisd\t{%1, %0|%0, %1}";
15596 return "%vcomisd\t{%1, %0|%0, %1}";
15599 gcc_assert (STACK_TOP_P (cmp_op0
));
15601 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15603 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15605 if (stack_top_dies
)
15607 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15608 return output_387_ffreep (operands
, 1);
15611 return "ftst\n\tfnstsw\t%0";
15614 if (STACK_REG_P (cmp_op1
)
15616 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15617 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15619 /* If both the top of the 387 stack dies, and the other operand
15620 is also a stack register that dies, then this must be a
15621 `fcompp' float compare */
15625 /* There is no double popping fcomi variant. Fortunately,
15626 eflags is immune from the fstp's cc clobbering. */
15628 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15630 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15631 return output_387_ffreep (operands
, 0);
15636 return "fucompp\n\tfnstsw\t%0";
15638 return "fcompp\n\tfnstsw\t%0";
15643 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15645 static const char * const alt
[16] =
15647 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15648 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15649 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15650 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15652 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15653 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15657 "fcomi\t{%y1, %0|%0, %y1}",
15658 "fcomip\t{%y1, %0|%0, %y1}",
15659 "fucomi\t{%y1, %0|%0, %y1}",
15660 "fucomip\t{%y1, %0|%0, %y1}",
15671 mask
= eflags_p
<< 3;
15672 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15673 mask
|= unordered_p
<< 1;
15674 mask
|= stack_top_dies
;
15676 gcc_assert (mask
< 16);
15685 ix86_output_addr_vec_elt (FILE *file
, int value
)
15687 const char *directive
= ASM_LONG
;
15691 directive
= ASM_QUAD
;
15693 gcc_assert (!TARGET_64BIT
);
15696 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15700 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15702 const char *directive
= ASM_LONG
;
15705 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15706 directive
= ASM_QUAD
;
15708 gcc_assert (!TARGET_64BIT
);
15710 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15711 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15712 fprintf (file
, "%s%s%d-%s%d\n",
15713 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15714 else if (HAVE_AS_GOTOFF_IN_DATA
)
15715 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15717 else if (TARGET_MACHO
)
15719 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15720 machopic_output_function_base_name (file
);
15725 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15726 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15729 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15733 ix86_expand_clear (rtx dest
)
15737 /* We play register width games, which are only valid after reload. */
15738 gcc_assert (reload_completed
);
15740 /* Avoid HImode and its attendant prefix byte. */
15741 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15742 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15743 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15745 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15746 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15748 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15749 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15755 /* X is an unchanging MEM. If it is a constant pool reference, return
15756 the constant pool rtx, else NULL. */
15759 maybe_get_pool_constant (rtx x
)
15761 x
= ix86_delegitimize_address (XEXP (x
, 0));
15763 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15764 return get_pool_constant (x
);
15770 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15773 enum tls_model model
;
15778 if (GET_CODE (op1
) == SYMBOL_REF
)
15780 model
= SYMBOL_REF_TLS_MODEL (op1
);
15783 op1
= legitimize_tls_address (op1
, model
, true);
15784 op1
= force_operand (op1
, op0
);
15787 if (GET_MODE (op1
) != mode
)
15788 op1
= convert_to_mode (mode
, op1
, 1);
15790 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15791 && SYMBOL_REF_DLLIMPORT_P (op1
))
15792 op1
= legitimize_dllimport_symbol (op1
, false);
15794 else if (GET_CODE (op1
) == CONST
15795 && GET_CODE (XEXP (op1
, 0)) == PLUS
15796 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15798 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15799 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15802 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15804 tmp
= legitimize_tls_address (symbol
, model
, true);
15805 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15806 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15807 tmp
= legitimize_dllimport_symbol (symbol
, true);
15811 tmp
= force_operand (tmp
, NULL
);
15812 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15813 op0
, 1, OPTAB_DIRECT
);
15816 if (GET_MODE (tmp
) != mode
)
15817 op1
= convert_to_mode (mode
, tmp
, 1);
15821 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15822 && symbolic_operand (op1
, mode
))
15824 if (TARGET_MACHO
&& !TARGET_64BIT
)
15827 /* dynamic-no-pic */
15828 if (MACHOPIC_INDIRECT
)
15830 rtx temp
= ((reload_in_progress
15831 || ((op0
&& REG_P (op0
))
15833 ? op0
: gen_reg_rtx (Pmode
));
15834 op1
= machopic_indirect_data_reference (op1
, temp
);
15836 op1
= machopic_legitimize_pic_address (op1
, mode
,
15837 temp
== op1
? 0 : temp
);
15839 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15841 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15845 if (GET_CODE (op0
) == MEM
)
15846 op1
= force_reg (Pmode
, op1
);
15850 if (GET_CODE (temp
) != REG
)
15851 temp
= gen_reg_rtx (Pmode
);
15852 temp
= legitimize_pic_address (op1
, temp
);
15857 /* dynamic-no-pic */
15863 op1
= force_reg (mode
, op1
);
15864 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15866 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15867 op1
= legitimize_pic_address (op1
, reg
);
15870 if (GET_MODE (op1
) != mode
)
15871 op1
= convert_to_mode (mode
, op1
, 1);
15878 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15879 || !push_operand (op0
, mode
))
15881 op1
= force_reg (mode
, op1
);
15883 if (push_operand (op0
, mode
)
15884 && ! general_no_elim_operand (op1
, mode
))
15885 op1
= copy_to_mode_reg (mode
, op1
);
15887 /* Force large constants in 64bit compilation into register
15888 to get them CSEed. */
15889 if (can_create_pseudo_p ()
15890 && (mode
== DImode
) && TARGET_64BIT
15891 && immediate_operand (op1
, mode
)
15892 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15893 && !register_operand (op0
, mode
)
15895 op1
= copy_to_mode_reg (mode
, op1
);
15897 if (can_create_pseudo_p ()
15898 && FLOAT_MODE_P (mode
)
15899 && GET_CODE (op1
) == CONST_DOUBLE
)
15901 /* If we are loading a floating point constant to a register,
15902 force the value to memory now, since we'll get better code
15903 out the back end. */
15905 op1
= validize_mem (force_const_mem (mode
, op1
));
15906 if (!register_operand (op0
, mode
))
15908 rtx temp
= gen_reg_rtx (mode
);
15909 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15910 emit_move_insn (op0
, temp
);
15916 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15920 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15922 rtx op0
= operands
[0], op1
= operands
[1];
15923 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15925 /* Force constants other than zero into memory. We do not know how
15926 the instructions used to build constants modify the upper 64 bits
15927 of the register, once we have that information we may be able
15928 to handle some of them more efficiently. */
15929 if (can_create_pseudo_p ()
15930 && register_operand (op0
, mode
)
15931 && (CONSTANT_P (op1
)
15932 || (GET_CODE (op1
) == SUBREG
15933 && CONSTANT_P (SUBREG_REG (op1
))))
15934 && !standard_sse_constant_p (op1
))
15935 op1
= validize_mem (force_const_mem (mode
, op1
));
15937 /* We need to check memory alignment for SSE mode since attribute
15938 can make operands unaligned. */
15939 if (can_create_pseudo_p ()
15940 && SSE_REG_MODE_P (mode
)
15941 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15942 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15946 /* ix86_expand_vector_move_misalign() does not like constants ... */
15947 if (CONSTANT_P (op1
)
15948 || (GET_CODE (op1
) == SUBREG
15949 && CONSTANT_P (SUBREG_REG (op1
))))
15950 op1
= validize_mem (force_const_mem (mode
, op1
));
15952 /* ... nor both arguments in memory. */
15953 if (!register_operand (op0
, mode
)
15954 && !register_operand (op1
, mode
))
15955 op1
= force_reg (mode
, op1
);
15957 tmp
[0] = op0
; tmp
[1] = op1
;
15958 ix86_expand_vector_move_misalign (mode
, tmp
);
15962 /* Make operand1 a register if it isn't already. */
15963 if (can_create_pseudo_p ()
15964 && !register_operand (op0
, mode
)
15965 && !register_operand (op1
, mode
))
15967 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15971 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15974 /* Split 32-byte AVX unaligned load and store if needed. */
15977 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15980 rtx (*extract
) (rtx
, rtx
, rtx
);
15981 rtx (*move_unaligned
) (rtx
, rtx
);
15982 enum machine_mode mode
;
15984 switch (GET_MODE (op0
))
15987 gcc_unreachable ();
15989 extract
= gen_avx_vextractf128v32qi
;
15990 move_unaligned
= gen_avx_movdqu256
;
15994 extract
= gen_avx_vextractf128v8sf
;
15995 move_unaligned
= gen_avx_movups256
;
15999 extract
= gen_avx_vextractf128v4df
;
16000 move_unaligned
= gen_avx_movupd256
;
16005 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16007 rtx r
= gen_reg_rtx (mode
);
16008 m
= adjust_address (op1
, mode
, 0);
16009 emit_move_insn (r
, m
);
16010 m
= adjust_address (op1
, mode
, 16);
16011 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16012 emit_move_insn (op0
, r
);
16014 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16016 m
= adjust_address (op0
, mode
, 0);
16017 emit_insn (extract (m
, op1
, const0_rtx
));
16018 m
= adjust_address (op0
, mode
, 16);
16019 emit_insn (extract (m
, op1
, const1_rtx
));
16022 emit_insn (move_unaligned (op0
, op1
));
16025 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16026 straight to ix86_expand_vector_move. */
16027 /* Code generation for scalar reg-reg moves of single and double precision data:
16028 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16032 if (x86_sse_partial_reg_dependency == true)
16037 Code generation for scalar loads of double precision data:
16038 if (x86_sse_split_regs == true)
16039 movlpd mem, reg (gas syntax)
16043 Code generation for unaligned packed loads of single precision data
16044 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16045 if (x86_sse_unaligned_move_optimal)
16048 if (x86_sse_partial_reg_dependency == true)
16060 Code generation for unaligned packed loads of double precision data
16061 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16062 if (x86_sse_unaligned_move_optimal)
16065 if (x86_sse_split_regs == true)
16078 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16086 && GET_MODE_SIZE (mode
) == 32)
16088 switch (GET_MODE_CLASS (mode
))
16090 case MODE_VECTOR_INT
:
16092 op0
= gen_lowpart (V32QImode
, op0
);
16093 op1
= gen_lowpart (V32QImode
, op1
);
16096 case MODE_VECTOR_FLOAT
:
16097 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16101 gcc_unreachable ();
16109 /* ??? If we have typed data, then it would appear that using
16110 movdqu is the only way to get unaligned data loaded with
16112 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16114 op0
= gen_lowpart (V16QImode
, op0
);
16115 op1
= gen_lowpart (V16QImode
, op1
);
16116 /* We will eventually emit movups based on insn attributes. */
16117 emit_insn (gen_sse2_movdqu (op0
, op1
));
16119 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16124 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16125 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16126 || optimize_function_for_size_p (cfun
))
16128 /* We will eventually emit movups based on insn attributes. */
16129 emit_insn (gen_sse2_movupd (op0
, op1
));
16133 /* When SSE registers are split into halves, we can avoid
16134 writing to the top half twice. */
16135 if (TARGET_SSE_SPLIT_REGS
)
16137 emit_clobber (op0
);
16142 /* ??? Not sure about the best option for the Intel chips.
16143 The following would seem to satisfy; the register is
16144 entirely cleared, breaking the dependency chain. We
16145 then store to the upper half, with a dependency depth
16146 of one. A rumor has it that Intel recommends two movsd
16147 followed by an unpacklpd, but this is unconfirmed. And
16148 given that the dependency depth of the unpacklpd would
16149 still be one, I'm not sure why this would be better. */
16150 zero
= CONST0_RTX (V2DFmode
);
16153 m
= adjust_address (op1
, DFmode
, 0);
16154 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16155 m
= adjust_address (op1
, DFmode
, 8);
16156 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16161 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16162 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16163 || optimize_function_for_size_p (cfun
))
16165 op0
= gen_lowpart (V4SFmode
, op0
);
16166 op1
= gen_lowpart (V4SFmode
, op1
);
16167 emit_insn (gen_sse_movups (op0
, op1
));
16171 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16172 emit_move_insn (op0
, CONST0_RTX (mode
));
16174 emit_clobber (op0
);
16176 if (mode
!= V4SFmode
)
16177 op0
= gen_lowpart (V4SFmode
, op0
);
16179 m
= adjust_address (op1
, V2SFmode
, 0);
16180 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16181 m
= adjust_address (op1
, V2SFmode
, 8);
16182 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16185 else if (MEM_P (op0
))
16187 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16189 op0
= gen_lowpart (V16QImode
, op0
);
16190 op1
= gen_lowpart (V16QImode
, op1
);
16191 /* We will eventually emit movups based on insn attributes. */
16192 emit_insn (gen_sse2_movdqu (op0
, op1
));
16194 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16197 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16198 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16199 || optimize_function_for_size_p (cfun
))
16200 /* We will eventually emit movups based on insn attributes. */
16201 emit_insn (gen_sse2_movupd (op0
, op1
));
16204 m
= adjust_address (op0
, DFmode
, 0);
16205 emit_insn (gen_sse2_storelpd (m
, op1
));
16206 m
= adjust_address (op0
, DFmode
, 8);
16207 emit_insn (gen_sse2_storehpd (m
, op1
));
16212 if (mode
!= V4SFmode
)
16213 op1
= gen_lowpart (V4SFmode
, op1
);
16216 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16217 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16218 || optimize_function_for_size_p (cfun
))
16220 op0
= gen_lowpart (V4SFmode
, op0
);
16221 emit_insn (gen_sse_movups (op0
, op1
));
16225 m
= adjust_address (op0
, V2SFmode
, 0);
16226 emit_insn (gen_sse_storelps (m
, op1
));
16227 m
= adjust_address (op0
, V2SFmode
, 8);
16228 emit_insn (gen_sse_storehps (m
, op1
));
16233 gcc_unreachable ();
16236 /* Expand a push in MODE. This is some mode for which we do not support
16237 proper push instructions, at least from the registers that we expect
16238 the value to live in. */
16241 ix86_expand_push (enum machine_mode mode
, rtx x
)
16245 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16246 GEN_INT (-GET_MODE_SIZE (mode
)),
16247 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16248 if (tmp
!= stack_pointer_rtx
)
16249 emit_move_insn (stack_pointer_rtx
, tmp
);
16251 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16253 /* When we push an operand onto stack, it has to be aligned at least
16254 at the function argument boundary. However since we don't have
16255 the argument type, we can't determine the actual argument
16257 emit_move_insn (tmp
, x
);
16260 /* Helper function of ix86_fixup_binary_operands to canonicalize
16261 operand order. Returns true if the operands should be swapped. */
16264 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16267 rtx dst
= operands
[0];
16268 rtx src1
= operands
[1];
16269 rtx src2
= operands
[2];
16271 /* If the operation is not commutative, we can't do anything. */
16272 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16275 /* Highest priority is that src1 should match dst. */
16276 if (rtx_equal_p (dst
, src1
))
16278 if (rtx_equal_p (dst
, src2
))
16281 /* Next highest priority is that immediate constants come second. */
16282 if (immediate_operand (src2
, mode
))
16284 if (immediate_operand (src1
, mode
))
16287 /* Lowest priority is that memory references should come second. */
16297 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16298 destination to use for the operation. If different from the true
16299 destination in operands[0], a copy operation will be required. */
16302 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16305 rtx dst
= operands
[0];
16306 rtx src1
= operands
[1];
16307 rtx src2
= operands
[2];
16309 /* Canonicalize operand order. */
16310 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16314 /* It is invalid to swap operands of different modes. */
16315 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16322 /* Both source operands cannot be in memory. */
16323 if (MEM_P (src1
) && MEM_P (src2
))
16325 /* Optimization: Only read from memory once. */
16326 if (rtx_equal_p (src1
, src2
))
16328 src2
= force_reg (mode
, src2
);
16332 src2
= force_reg (mode
, src2
);
16335 /* If the destination is memory, and we do not have matching source
16336 operands, do things in registers. */
16337 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16338 dst
= gen_reg_rtx (mode
);
16340 /* Source 1 cannot be a constant. */
16341 if (CONSTANT_P (src1
))
16342 src1
= force_reg (mode
, src1
);
16344 /* Source 1 cannot be a non-matching memory. */
16345 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16346 src1
= force_reg (mode
, src1
);
16348 /* Improve address combine. */
16350 && GET_MODE_CLASS (mode
) == MODE_INT
16352 src2
= force_reg (mode
, src2
);
16354 operands
[1] = src1
;
16355 operands
[2] = src2
;
16359 /* Similarly, but assume that the destination has already been
16360 set up properly. */
16363 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16364 enum machine_mode mode
, rtx operands
[])
16366 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16367 gcc_assert (dst
== operands
[0]);
16370 /* Attempt to expand a binary operator. Make the expansion closer to the
16371 actual machine, then just general_operand, which will allow 3 separate
16372 memory references (one output, two input) in a single insn. */
16375 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16378 rtx src1
, src2
, dst
, op
, clob
;
16380 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16381 src1
= operands
[1];
16382 src2
= operands
[2];
16384 /* Emit the instruction. */
16386 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16387 if (reload_in_progress
)
16389 /* Reload doesn't know about the flags register, and doesn't know that
16390 it doesn't want to clobber it. We can only do this with PLUS. */
16391 gcc_assert (code
== PLUS
);
16394 else if (reload_completed
16396 && !rtx_equal_p (dst
, src1
))
16398 /* This is going to be an LEA; avoid splitting it later. */
16403 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16404 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16407 /* Fix up the destination if needed. */
16408 if (dst
!= operands
[0])
16409 emit_move_insn (operands
[0], dst
);
16412 /* Return TRUE or FALSE depending on whether the binary operator meets the
16413 appropriate constraints. */
16416 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16419 rtx dst
= operands
[0];
16420 rtx src1
= operands
[1];
16421 rtx src2
= operands
[2];
16423 /* Both source operands cannot be in memory. */
16424 if (MEM_P (src1
) && MEM_P (src2
))
16427 /* Canonicalize operand order for commutative operators. */
16428 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16435 /* If the destination is memory, we must have a matching source operand. */
16436 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16439 /* Source 1 cannot be a constant. */
16440 if (CONSTANT_P (src1
))
16443 /* Source 1 cannot be a non-matching memory. */
16444 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16445 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16446 return (code
== AND
16449 || (TARGET_64BIT
&& mode
== DImode
))
16450 && satisfies_constraint_L (src2
));
16455 /* Attempt to expand a unary operator. Make the expansion closer to the
16456 actual machine, then just general_operand, which will allow 2 separate
16457 memory references (one output, one input) in a single insn. */
16460 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16463 int matching_memory
;
16464 rtx src
, dst
, op
, clob
;
16469 /* If the destination is memory, and we do not have matching source
16470 operands, do things in registers. */
16471 matching_memory
= 0;
16474 if (rtx_equal_p (dst
, src
))
16475 matching_memory
= 1;
16477 dst
= gen_reg_rtx (mode
);
16480 /* When source operand is memory, destination must match. */
16481 if (MEM_P (src
) && !matching_memory
)
16482 src
= force_reg (mode
, src
);
16484 /* Emit the instruction. */
16486 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16487 if (reload_in_progress
|| code
== NOT
)
16489 /* Reload doesn't know about the flags register, and doesn't know that
16490 it doesn't want to clobber it. */
16491 gcc_assert (code
== NOT
);
16496 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16497 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16500 /* Fix up the destination if needed. */
16501 if (dst
!= operands
[0])
16502 emit_move_insn (operands
[0], dst
);
16505 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16506 divisor are within the range [0-255]. */
16509 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16512 rtx end_label
, qimode_label
;
16513 rtx insn
, div
, mod
;
16514 rtx scratch
, tmp0
, tmp1
, tmp2
;
16515 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16516 rtx (*gen_zero_extend
) (rtx
, rtx
);
16517 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16522 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16523 gen_test_ccno_1
= gen_testsi_ccno_1
;
16524 gen_zero_extend
= gen_zero_extendqisi2
;
16527 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16528 gen_test_ccno_1
= gen_testdi_ccno_1
;
16529 gen_zero_extend
= gen_zero_extendqidi2
;
16532 gcc_unreachable ();
16535 end_label
= gen_label_rtx ();
16536 qimode_label
= gen_label_rtx ();
16538 scratch
= gen_reg_rtx (mode
);
16540 /* Use 8bit unsigned divimod if dividend and divisor are within
16541 the range [0-255]. */
16542 emit_move_insn (scratch
, operands
[2]);
16543 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16544 scratch
, 1, OPTAB_DIRECT
);
16545 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16546 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16547 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16548 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16549 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16551 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16552 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16553 JUMP_LABEL (insn
) = qimode_label
;
16555 /* Generate original signed/unsigned divimod. */
16556 div
= gen_divmod4_1 (operands
[0], operands
[1],
16557 operands
[2], operands
[3]);
16560 /* Branch to the end. */
16561 emit_jump_insn (gen_jump (end_label
));
16564 /* Generate 8bit unsigned divide. */
16565 emit_label (qimode_label
);
16566 /* Don't use operands[0] for result of 8bit divide since not all
16567 registers support QImode ZERO_EXTRACT. */
16568 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16569 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16570 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16571 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16575 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16576 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16580 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16581 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16584 /* Extract remainder from AH. */
16585 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16586 if (REG_P (operands
[1]))
16587 insn
= emit_move_insn (operands
[1], tmp1
);
16590 /* Need a new scratch register since the old one has result
16592 scratch
= gen_reg_rtx (mode
);
16593 emit_move_insn (scratch
, tmp1
);
16594 insn
= emit_move_insn (operands
[1], scratch
);
16596 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16598 /* Zero extend quotient from AL. */
16599 tmp1
= gen_lowpart (QImode
, tmp0
);
16600 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16601 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16603 emit_label (end_label
);
16606 #define LEA_MAX_STALL (3)
16607 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16609 /* Increase given DISTANCE in half-cycles according to
16610 dependencies between PREV and NEXT instructions.
16611 Add 1 half-cycle if there is no dependency and
16612 go to next cycle if there is some dependecy. */
16614 static unsigned int
16615 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16620 if (!prev
|| !next
)
16621 return distance
+ (distance
& 1) + 2;
16623 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16624 return distance
+ 1;
16626 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16627 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16628 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16629 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16630 return distance
+ (distance
& 1) + 2;
16632 return distance
+ 1;
16635 /* Function checks if instruction INSN defines register number
16636 REGNO1 or REGNO2. */
16639 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16644 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16645 if (DF_REF_REG_DEF_P (*def_rec
)
16646 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16647 && (regno1
== DF_REF_REGNO (*def_rec
)
16648 || regno2
== DF_REF_REGNO (*def_rec
)))
16656 /* Function checks if instruction INSN uses register number
16657 REGNO as a part of address expression. */
16660 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16664 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16665 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16671 /* Search backward for non-agu definition of register number REGNO1
16672 or register number REGNO2 in basic block starting from instruction
16673 START up to head of basic block or instruction INSN.
16675 Function puts true value into *FOUND var if definition was found
16676 and false otherwise.
16678 Distance in half-cycles between START and found instruction or head
16679 of BB is added to DISTANCE and returned. */
16682 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16683 rtx insn
, int distance
,
16684 rtx start
, bool *found
)
16686 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16694 && distance
< LEA_SEARCH_THRESHOLD
)
16696 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16698 distance
= increase_distance (prev
, next
, distance
);
16699 if (insn_defines_reg (regno1
, regno2
, prev
))
16701 if (recog_memoized (prev
) < 0
16702 || get_attr_type (prev
) != TYPE_LEA
)
16711 if (prev
== BB_HEAD (bb
))
16714 prev
= PREV_INSN (prev
);
16720 /* Search backward for non-agu definition of register number REGNO1
16721 or register number REGNO2 in INSN's basic block until
16722 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16723 2. Reach neighbour BBs boundary, or
16724 3. Reach agu definition.
16725 Returns the distance between the non-agu definition point and INSN.
16726 If no definition point, returns -1. */
16729 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16732 basic_block bb
= BLOCK_FOR_INSN (insn
);
16734 bool found
= false;
16736 if (insn
!= BB_HEAD (bb
))
16737 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16738 distance
, PREV_INSN (insn
),
16741 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16745 bool simple_loop
= false;
16747 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16750 simple_loop
= true;
16755 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16757 BB_END (bb
), &found
);
16760 int shortest_dist
= -1;
16761 bool found_in_bb
= false;
16763 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16766 = distance_non_agu_define_in_bb (regno1
, regno2
,
16772 if (shortest_dist
< 0)
16773 shortest_dist
= bb_dist
;
16774 else if (bb_dist
> 0)
16775 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16781 distance
= shortest_dist
;
16785 /* get_attr_type may modify recog data. We want to make sure
16786 that recog data is valid for instruction INSN, on which
16787 distance_non_agu_define is called. INSN is unchanged here. */
16788 extract_insn_cached (insn
);
16793 return distance
>> 1;
16796 /* Return the distance in half-cycles between INSN and the next
16797 insn that uses register number REGNO in memory address added
16798 to DISTANCE. Return -1 if REGNO0 is set.
16800 Put true value into *FOUND if register usage was found and
16802 Put true value into *REDEFINED if register redefinition was
16803 found and false otherwise. */
16806 distance_agu_use_in_bb (unsigned int regno
,
16807 rtx insn
, int distance
, rtx start
,
16808 bool *found
, bool *redefined
)
16810 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16815 *redefined
= false;
16819 && distance
< LEA_SEARCH_THRESHOLD
)
16821 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16823 distance
= increase_distance(prev
, next
, distance
);
16824 if (insn_uses_reg_mem (regno
, next
))
16826 /* Return DISTANCE if OP0 is used in memory
16827 address in NEXT. */
16832 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16834 /* Return -1 if OP0 is set in NEXT. */
16842 if (next
== BB_END (bb
))
16845 next
= NEXT_INSN (next
);
16851 /* Return the distance between INSN and the next insn that uses
16852 register number REGNO0 in memory address. Return -1 if no such
16853 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16856 distance_agu_use (unsigned int regno0
, rtx insn
)
16858 basic_block bb
= BLOCK_FOR_INSN (insn
);
16860 bool found
= false;
16861 bool redefined
= false;
16863 if (insn
!= BB_END (bb
))
16864 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16866 &found
, &redefined
);
16868 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16872 bool simple_loop
= false;
16874 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16877 simple_loop
= true;
16882 distance
= distance_agu_use_in_bb (regno0
, insn
,
16883 distance
, BB_HEAD (bb
),
16884 &found
, &redefined
);
16887 int shortest_dist
= -1;
16888 bool found_in_bb
= false;
16889 bool redefined_in_bb
= false;
16891 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16894 = distance_agu_use_in_bb (regno0
, insn
,
16895 distance
, BB_HEAD (e
->dest
),
16896 &found_in_bb
, &redefined_in_bb
);
16899 if (shortest_dist
< 0)
16900 shortest_dist
= bb_dist
;
16901 else if (bb_dist
> 0)
16902 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16908 distance
= shortest_dist
;
16912 if (!found
|| redefined
)
16915 return distance
>> 1;
16918 /* Define this macro to tune LEA priority vs ADD, it take effect when
16919 there is a dilemma of choicing LEA or ADD
16920 Negative value: ADD is more preferred than LEA
16922 Positive value: LEA is more preferred than ADD*/
16923 #define IX86_LEA_PRIORITY 0
16925 /* Return true if usage of lea INSN has performance advantage
16926 over a sequence of instructions. Instructions sequence has
16927 SPLIT_COST cycles higher latency than lea latency. */
16930 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16931 unsigned int regno2
, int split_cost
)
16933 int dist_define
, dist_use
;
16935 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16936 dist_use
= distance_agu_use (regno0
, insn
);
16938 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16940 /* If there is no non AGU operand definition, no AGU
16941 operand usage and split cost is 0 then both lea
16942 and non lea variants have same priority. Currently
16943 we prefer lea for 64 bit code and non lea on 32 bit
16945 if (dist_use
< 0 && split_cost
== 0)
16946 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16951 /* With longer definitions distance lea is more preferable.
16952 Here we change it to take into account splitting cost and
16954 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16956 /* If there is no use in memory addess then we just check
16957 that split cost exceeds AGU stall. */
16959 return dist_define
> LEA_MAX_STALL
;
16961 /* If this insn has both backward non-agu dependence and forward
16962 agu dependence, the one with short distance takes effect. */
16963 return dist_define
>= dist_use
;
16966 /* Return true if it is legal to clobber flags by INSN and
16967 false otherwise. */
16970 ix86_ok_to_clobber_flags (rtx insn
)
16972 basic_block bb
= BLOCK_FOR_INSN (insn
);
16978 if (NONDEBUG_INSN_P (insn
))
16980 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16981 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16984 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16988 if (insn
== BB_END (bb
))
16991 insn
= NEXT_INSN (insn
);
16994 live
= df_get_live_out(bb
);
16995 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16998 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16999 move and add to avoid AGU stalls. */
17002 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17004 unsigned int regno0
, regno1
, regno2
;
17006 /* Check if we need to optimize. */
17007 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17010 /* Check it is correct to split here. */
17011 if (!ix86_ok_to_clobber_flags(insn
))
17014 regno0
= true_regnum (operands
[0]);
17015 regno1
= true_regnum (operands
[1]);
17016 regno2
= true_regnum (operands
[2]);
17018 /* We need to split only adds with non destructive
17019 destination operand. */
17020 if (regno0
== regno1
|| regno0
== regno2
)
17023 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
17026 /* Return true if we should emit lea instruction instead of mov
17030 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17032 unsigned int regno0
, regno1
;
17034 /* Check if we need to optimize. */
17035 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17038 /* Use lea for reg to reg moves only. */
17039 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17042 regno0
= true_regnum (operands
[0]);
17043 regno1
= true_regnum (operands
[1]);
17045 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0);
17048 /* Return true if we need to split lea into a sequence of
17049 instructions to avoid AGU stalls. */
17052 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17054 unsigned int regno0
, regno1
, regno2
;
17056 struct ix86_address parts
;
17059 /* Check we need to optimize. */
17060 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17063 /* Check it is correct to split here. */
17064 if (!ix86_ok_to_clobber_flags(insn
))
17067 ok
= ix86_decompose_address (operands
[1], &parts
);
17070 /* There should be at least two components in the address. */
17071 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17072 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17075 /* We should not split into add if non legitimate pic
17076 operand is used as displacement. */
17077 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17080 regno0
= true_regnum (operands
[0]) ;
17081 regno1
= INVALID_REGNUM
;
17082 regno2
= INVALID_REGNUM
;
17085 regno1
= true_regnum (parts
.base
);
17087 regno2
= true_regnum (parts
.index
);
17091 /* Compute how many cycles we will add to execution time
17092 if split lea into a sequence of instructions. */
17093 if (parts
.base
|| parts
.index
)
17095 /* Have to use mov instruction if non desctructive
17096 destination form is used. */
17097 if (regno1
!= regno0
&& regno2
!= regno0
)
17100 /* Have to add index to base if both exist. */
17101 if (parts
.base
&& parts
.index
)
17104 /* Have to use shift and adds if scale is 2 or greater. */
17105 if (parts
.scale
> 1)
17107 if (regno0
!= regno1
)
17109 else if (regno2
== regno0
)
17112 split_cost
+= parts
.scale
;
17115 /* Have to use add instruction with immediate if
17116 disp is non zero. */
17117 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17120 /* Subtract the price of lea. */
17124 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
17127 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17128 matches destination. RTX includes clobber of FLAGS_REG. */
17131 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17136 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17137 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17139 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17142 /* Return true if regno1 def is nearest to the insn. */
17145 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17148 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17152 while (prev
&& prev
!= start
)
17154 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17156 prev
= PREV_INSN (prev
);
17159 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17161 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17163 prev
= PREV_INSN (prev
);
17166 /* None of the regs is defined in the bb. */
17170 /* Split lea instructions into a sequence of instructions
17171 which are executed on ALU to avoid AGU stalls.
17172 It is assumed that it is allowed to clobber flags register
17173 at lea position. */
17176 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17178 unsigned int regno0
, regno1
, regno2
;
17179 struct ix86_address parts
;
17183 ok
= ix86_decompose_address (operands
[1], &parts
);
17186 target
= gen_lowpart (mode
, operands
[0]);
17188 regno0
= true_regnum (target
);
17189 regno1
= INVALID_REGNUM
;
17190 regno2
= INVALID_REGNUM
;
17194 parts
.base
= gen_lowpart (mode
, parts
.base
);
17195 regno1
= true_regnum (parts
.base
);
17200 parts
.index
= gen_lowpart (mode
, parts
.index
);
17201 regno2
= true_regnum (parts
.index
);
17205 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17207 if (parts
.scale
> 1)
17209 /* Case r1 = r1 + ... */
17210 if (regno1
== regno0
)
17212 /* If we have a case r1 = r1 + C * r1 then we
17213 should use multiplication which is very
17214 expensive. Assume cost model is wrong if we
17215 have such case here. */
17216 gcc_assert (regno2
!= regno0
);
17218 for (adds
= parts
.scale
; adds
> 0; adds
--)
17219 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17223 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17224 if (regno0
!= regno2
)
17225 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17227 /* Use shift for scaling. */
17228 ix86_emit_binop (ASHIFT
, mode
, target
,
17229 GEN_INT (exact_log2 (parts
.scale
)));
17232 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17234 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17235 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17238 else if (!parts
.base
&& !parts
.index
)
17240 gcc_assert(parts
.disp
);
17241 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17247 if (regno0
!= regno2
)
17248 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17250 else if (!parts
.index
)
17252 if (regno0
!= regno1
)
17253 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17257 if (regno0
== regno1
)
17259 else if (regno0
== regno2
)
17265 /* Find better operand for SET instruction, depending
17266 on which definition is farther from the insn. */
17267 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17268 tmp
= parts
.index
, tmp1
= parts
.base
;
17270 tmp
= parts
.base
, tmp1
= parts
.index
;
17272 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17274 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17275 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17277 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17281 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17284 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17285 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17289 /* Return true if it is ok to optimize an ADD operation to LEA
17290 operation to avoid flag register consumation. For most processors,
17291 ADD is faster than LEA. For the processors like ATOM, if the
17292 destination register of LEA holds an actual address which will be
17293 used soon, LEA is better and otherwise ADD is better. */
17296 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17298 unsigned int regno0
= true_regnum (operands
[0]);
17299 unsigned int regno1
= true_regnum (operands
[1]);
17300 unsigned int regno2
= true_regnum (operands
[2]);
17302 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17303 if (regno0
!= regno1
&& regno0
!= regno2
)
17306 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17309 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17312 /* Return true if destination reg of SET_BODY is shift count of
17316 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17322 /* Retrieve destination of SET_BODY. */
17323 switch (GET_CODE (set_body
))
17326 set_dest
= SET_DEST (set_body
);
17327 if (!set_dest
|| !REG_P (set_dest
))
17331 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17332 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17340 /* Retrieve shift count of USE_BODY. */
17341 switch (GET_CODE (use_body
))
17344 shift_rtx
= XEXP (use_body
, 1);
17347 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17348 if (ix86_dep_by_shift_count_body (set_body
,
17349 XVECEXP (use_body
, 0, i
)))
17357 && (GET_CODE (shift_rtx
) == ASHIFT
17358 || GET_CODE (shift_rtx
) == LSHIFTRT
17359 || GET_CODE (shift_rtx
) == ASHIFTRT
17360 || GET_CODE (shift_rtx
) == ROTATE
17361 || GET_CODE (shift_rtx
) == ROTATERT
))
17363 rtx shift_count
= XEXP (shift_rtx
, 1);
17365 /* Return true if shift count is dest of SET_BODY. */
17366 if (REG_P (shift_count
)
17367 && true_regnum (set_dest
) == true_regnum (shift_count
))
17374 /* Return true if destination reg of SET_INSN is shift count of
17378 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17380 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17381 PATTERN (use_insn
));
17384 /* Return TRUE or FALSE depending on whether the unary operator meets the
17385 appropriate constraints. */
17388 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17389 enum machine_mode mode ATTRIBUTE_UNUSED
,
17390 rtx operands
[2] ATTRIBUTE_UNUSED
)
17392 /* If one of operands is memory, source and destination must match. */
17393 if ((MEM_P (operands
[0])
17394 || MEM_P (operands
[1]))
17395 && ! rtx_equal_p (operands
[0], operands
[1]))
17400 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17401 are ok, keeping in mind the possible movddup alternative. */
17404 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17406 if (MEM_P (operands
[0]))
17407 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17408 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17409 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17413 /* Post-reload splitter for converting an SF or DFmode value in an
17414 SSE register into an unsigned SImode. */
17417 ix86_split_convert_uns_si_sse (rtx operands
[])
17419 enum machine_mode vecmode
;
17420 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17422 large
= operands
[1];
17423 zero_or_two31
= operands
[2];
17424 input
= operands
[3];
17425 two31
= operands
[4];
17426 vecmode
= GET_MODE (large
);
17427 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17429 /* Load up the value into the low element. We must ensure that the other
17430 elements are valid floats -- zero is the easiest such value. */
17433 if (vecmode
== V4SFmode
)
17434 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17436 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17440 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17441 emit_move_insn (value
, CONST0_RTX (vecmode
));
17442 if (vecmode
== V4SFmode
)
17443 emit_insn (gen_sse_movss (value
, value
, input
));
17445 emit_insn (gen_sse2_movsd (value
, value
, input
));
17448 emit_move_insn (large
, two31
);
17449 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17451 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17452 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17454 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17455 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17457 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17458 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17460 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17461 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17463 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17464 if (vecmode
== V4SFmode
)
17465 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17467 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17470 emit_insn (gen_xorv4si3 (value
, value
, large
));
17473 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17474 Expects the 64-bit DImode to be supplied in a pair of integral
17475 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17476 -mfpmath=sse, !optimize_size only. */
17479 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17481 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17482 rtx int_xmm
, fp_xmm
;
17483 rtx biases
, exponents
;
17486 int_xmm
= gen_reg_rtx (V4SImode
);
17487 if (TARGET_INTER_UNIT_MOVES
)
17488 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17489 else if (TARGET_SSE_SPLIT_REGS
)
17491 emit_clobber (int_xmm
);
17492 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17496 x
= gen_reg_rtx (V2DImode
);
17497 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17498 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17501 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17502 gen_rtvec (4, GEN_INT (0x43300000UL
),
17503 GEN_INT (0x45300000UL
),
17504 const0_rtx
, const0_rtx
));
17505 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17507 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17508 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17510 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17511 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17512 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17513 (0x1.0p84 + double(fp_value_hi_xmm)).
17514 Note these exponents differ by 32. */
17516 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17518 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17519 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17520 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17521 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17522 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17523 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17524 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17525 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17526 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17528 /* Add the upper and lower DFmode values together. */
17530 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17533 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17534 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17535 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17538 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17541 /* Not used, but eases macroization of patterns. */
17543 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17544 rtx input ATTRIBUTE_UNUSED
)
17546 gcc_unreachable ();
17549 /* Convert an unsigned SImode value into a DFmode. Only currently used
17550 for SSE, but applicable anywhere. */
17553 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17555 REAL_VALUE_TYPE TWO31r
;
17558 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17559 NULL
, 1, OPTAB_DIRECT
);
17561 fp
= gen_reg_rtx (DFmode
);
17562 emit_insn (gen_floatsidf2 (fp
, x
));
17564 real_ldexp (&TWO31r
, &dconst1
, 31);
17565 x
= const_double_from_real_value (TWO31r
, DFmode
);
17567 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17569 emit_move_insn (target
, x
);
17572 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17573 32-bit mode; otherwise we have a direct convert instruction. */
17576 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17578 REAL_VALUE_TYPE TWO32r
;
17579 rtx fp_lo
, fp_hi
, x
;
17581 fp_lo
= gen_reg_rtx (DFmode
);
17582 fp_hi
= gen_reg_rtx (DFmode
);
17584 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17586 real_ldexp (&TWO32r
, &dconst1
, 32);
17587 x
= const_double_from_real_value (TWO32r
, DFmode
);
17588 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17590 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17592 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17595 emit_move_insn (target
, x
);
17598 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17599 For x86_32, -mfpmath=sse, !optimize_size only. */
17601 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17603 REAL_VALUE_TYPE ONE16r
;
17604 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17606 real_ldexp (&ONE16r
, &dconst1
, 16);
17607 x
= const_double_from_real_value (ONE16r
, SFmode
);
17608 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17609 NULL
, 0, OPTAB_DIRECT
);
17610 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17611 NULL
, 0, OPTAB_DIRECT
);
17612 fp_hi
= gen_reg_rtx (SFmode
);
17613 fp_lo
= gen_reg_rtx (SFmode
);
17614 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17615 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17616 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17618 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17620 if (!rtx_equal_p (target
, fp_hi
))
17621 emit_move_insn (target
, fp_hi
);
17624 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17625 a vector of unsigned ints VAL to vector of floats TARGET. */
17628 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17631 REAL_VALUE_TYPE TWO16r
;
17632 enum machine_mode intmode
= GET_MODE (val
);
17633 enum machine_mode fltmode
= GET_MODE (target
);
17634 rtx (*cvt
) (rtx
, rtx
);
17636 if (intmode
== V4SImode
)
17637 cvt
= gen_floatv4siv4sf2
;
17639 cvt
= gen_floatv8siv8sf2
;
17640 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17641 tmp
[0] = force_reg (intmode
, tmp
[0]);
17642 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17644 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17645 NULL_RTX
, 1, OPTAB_DIRECT
);
17646 tmp
[3] = gen_reg_rtx (fltmode
);
17647 emit_insn (cvt (tmp
[3], tmp
[1]));
17648 tmp
[4] = gen_reg_rtx (fltmode
);
17649 emit_insn (cvt (tmp
[4], tmp
[2]));
17650 real_ldexp (&TWO16r
, &dconst1
, 16);
17651 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17652 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17653 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17655 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17657 if (tmp
[7] != target
)
17658 emit_move_insn (target
, tmp
[7]);
17661 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17662 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17663 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17664 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17667 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17669 REAL_VALUE_TYPE TWO31r
;
17670 rtx two31r
, tmp
[4];
17671 enum machine_mode mode
= GET_MODE (val
);
17672 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17673 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17674 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17677 for (i
= 0; i
< 3; i
++)
17678 tmp
[i
] = gen_reg_rtx (mode
);
17679 real_ldexp (&TWO31r
, &dconst1
, 31);
17680 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17681 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17682 two31r
= force_reg (mode
, two31r
);
17685 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17686 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17687 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17688 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17689 default: gcc_unreachable ();
17691 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17692 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17693 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17695 if (intmode
== V4SImode
|| TARGET_AVX2
)
17696 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17697 gen_lowpart (intmode
, tmp
[0]),
17698 GEN_INT (31), NULL_RTX
, 0,
17702 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17703 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17704 *xorp
= expand_simple_binop (intmode
, AND
,
17705 gen_lowpart (intmode
, tmp
[0]),
17706 two31
, NULL_RTX
, 0,
17709 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17713 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17714 then replicate the value for all elements of the vector
17718 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17722 enum machine_mode scalar_mode
;
17739 n_elt
= GET_MODE_NUNITS (mode
);
17740 v
= rtvec_alloc (n_elt
);
17741 scalar_mode
= GET_MODE_INNER (mode
);
17743 RTVEC_ELT (v
, 0) = value
;
17745 for (i
= 1; i
< n_elt
; ++i
)
17746 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17748 return gen_rtx_CONST_VECTOR (mode
, v
);
17751 gcc_unreachable ();
17755 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17756 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17757 for an SSE register. If VECT is true, then replicate the mask for
17758 all elements of the vector register. If INVERT is true, then create
17759 a mask excluding the sign bit. */
17762 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17764 enum machine_mode vec_mode
, imode
;
17765 HOST_WIDE_INT hi
, lo
;
17770 /* Find the sign bit, sign extended to 2*HWI. */
17778 mode
= GET_MODE_INNER (mode
);
17780 lo
= 0x80000000, hi
= lo
< 0;
17788 mode
= GET_MODE_INNER (mode
);
17790 if (HOST_BITS_PER_WIDE_INT
>= 64)
17791 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17793 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17798 vec_mode
= VOIDmode
;
17799 if (HOST_BITS_PER_WIDE_INT
>= 64)
17802 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17809 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17813 lo
= ~lo
, hi
= ~hi
;
17819 mask
= immed_double_const (lo
, hi
, imode
);
17821 vec
= gen_rtvec (2, v
, mask
);
17822 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17823 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17830 gcc_unreachable ();
17834 lo
= ~lo
, hi
= ~hi
;
17836 /* Force this value into the low part of a fp vector constant. */
17837 mask
= immed_double_const (lo
, hi
, imode
);
17838 mask
= gen_lowpart (mode
, mask
);
17840 if (vec_mode
== VOIDmode
)
17841 return force_reg (mode
, mask
);
17843 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17844 return force_reg (vec_mode
, v
);
17847 /* Generate code for floating point ABS or NEG. */
17850 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17853 rtx mask
, set
, dst
, src
;
17854 bool use_sse
= false;
17855 bool vector_mode
= VECTOR_MODE_P (mode
);
17856 enum machine_mode vmode
= mode
;
17860 else if (mode
== TFmode
)
17862 else if (TARGET_SSE_MATH
)
17864 use_sse
= SSE_FLOAT_MODE_P (mode
);
17865 if (mode
== SFmode
)
17867 else if (mode
== DFmode
)
17871 /* NEG and ABS performed with SSE use bitwise mask operations.
17872 Create the appropriate mask now. */
17874 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17881 set
= gen_rtx_fmt_e (code
, mode
, src
);
17882 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17889 use
= gen_rtx_USE (VOIDmode
, mask
);
17891 par
= gen_rtvec (2, set
, use
);
17894 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17895 par
= gen_rtvec (3, set
, use
, clob
);
17897 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17903 /* Expand a copysign operation. Special case operand 0 being a constant. */
17906 ix86_expand_copysign (rtx operands
[])
17908 enum machine_mode mode
, vmode
;
17909 rtx dest
, op0
, op1
, mask
, nmask
;
17911 dest
= operands
[0];
17915 mode
= GET_MODE (dest
);
17917 if (mode
== SFmode
)
17919 else if (mode
== DFmode
)
17924 if (GET_CODE (op0
) == CONST_DOUBLE
)
17926 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17928 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17929 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17931 if (mode
== SFmode
|| mode
== DFmode
)
17933 if (op0
== CONST0_RTX (mode
))
17934 op0
= CONST0_RTX (vmode
);
17937 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17939 op0
= force_reg (vmode
, v
);
17942 else if (op0
!= CONST0_RTX (mode
))
17943 op0
= force_reg (mode
, op0
);
17945 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17947 if (mode
== SFmode
)
17948 copysign_insn
= gen_copysignsf3_const
;
17949 else if (mode
== DFmode
)
17950 copysign_insn
= gen_copysigndf3_const
;
17952 copysign_insn
= gen_copysigntf3_const
;
17954 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17958 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17960 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17961 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17963 if (mode
== SFmode
)
17964 copysign_insn
= gen_copysignsf3_var
;
17965 else if (mode
== DFmode
)
17966 copysign_insn
= gen_copysigndf3_var
;
17968 copysign_insn
= gen_copysigntf3_var
;
17970 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17974 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17975 be a constant, and so has already been expanded into a vector constant. */
17978 ix86_split_copysign_const (rtx operands
[])
17980 enum machine_mode mode
, vmode
;
17981 rtx dest
, op0
, mask
, x
;
17983 dest
= operands
[0];
17985 mask
= operands
[3];
17987 mode
= GET_MODE (dest
);
17988 vmode
= GET_MODE (mask
);
17990 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17991 x
= gen_rtx_AND (vmode
, dest
, mask
);
17992 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17994 if (op0
!= CONST0_RTX (vmode
))
17996 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17997 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18001 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18002 so we have to do two masks. */
18005 ix86_split_copysign_var (rtx operands
[])
18007 enum machine_mode mode
, vmode
;
18008 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18010 dest
= operands
[0];
18011 scratch
= operands
[1];
18014 nmask
= operands
[4];
18015 mask
= operands
[5];
18017 mode
= GET_MODE (dest
);
18018 vmode
= GET_MODE (mask
);
18020 if (rtx_equal_p (op0
, op1
))
18022 /* Shouldn't happen often (it's useless, obviously), but when it does
18023 we'd generate incorrect code if we continue below. */
18024 emit_move_insn (dest
, op0
);
18028 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18030 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18032 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18033 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18036 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18037 x
= gen_rtx_NOT (vmode
, dest
);
18038 x
= gen_rtx_AND (vmode
, x
, op0
);
18039 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18043 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18045 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18047 else /* alternative 2,4 */
18049 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18050 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18051 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18053 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18055 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18057 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18058 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18060 else /* alternative 3,4 */
18062 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18064 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18065 x
= gen_rtx_AND (vmode
, dest
, op0
);
18067 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18070 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18071 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18074 /* Return TRUE or FALSE depending on whether the first SET in INSN
18075 has source and destination with matching CC modes, and that the
18076 CC mode is at least as constrained as REQ_MODE. */
18079 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18082 enum machine_mode set_mode
;
18084 set
= PATTERN (insn
);
18085 if (GET_CODE (set
) == PARALLEL
)
18086 set
= XVECEXP (set
, 0, 0);
18087 gcc_assert (GET_CODE (set
) == SET
);
18088 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18090 set_mode
= GET_MODE (SET_DEST (set
));
18094 if (req_mode
!= CCNOmode
18095 && (req_mode
!= CCmode
18096 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18100 if (req_mode
== CCGCmode
)
18104 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18108 if (req_mode
== CCZmode
)
18118 if (set_mode
!= req_mode
)
18123 gcc_unreachable ();
18126 return GET_MODE (SET_SRC (set
)) == set_mode
;
18129 /* Generate insn patterns to do an integer compare of OPERANDS. */
18132 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18134 enum machine_mode cmpmode
;
18137 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18138 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18140 /* This is very simple, but making the interface the same as in the
18141 FP case makes the rest of the code easier. */
18142 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18143 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18145 /* Return the test that should be put into the flags user, i.e.
18146 the bcc, scc, or cmov instruction. */
18147 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18150 /* Figure out whether to use ordered or unordered fp comparisons.
18151 Return the appropriate mode to use. */
18154 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18156 /* ??? In order to make all comparisons reversible, we do all comparisons
18157 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18158 all forms trapping and nontrapping comparisons, we can make inequality
18159 comparisons trapping again, since it results in better code when using
18160 FCOM based compares. */
18161 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18165 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18167 enum machine_mode mode
= GET_MODE (op0
);
18169 if (SCALAR_FLOAT_MODE_P (mode
))
18171 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18172 return ix86_fp_compare_mode (code
);
18177 /* Only zero flag is needed. */
18178 case EQ
: /* ZF=0 */
18179 case NE
: /* ZF!=0 */
18181 /* Codes needing carry flag. */
18182 case GEU
: /* CF=0 */
18183 case LTU
: /* CF=1 */
18184 /* Detect overflow checks. They need just the carry flag. */
18185 if (GET_CODE (op0
) == PLUS
18186 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18190 case GTU
: /* CF=0 & ZF=0 */
18191 case LEU
: /* CF=1 | ZF=1 */
18192 /* Detect overflow checks. They need just the carry flag. */
18193 if (GET_CODE (op0
) == MINUS
18194 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18198 /* Codes possibly doable only with sign flag when
18199 comparing against zero. */
18200 case GE
: /* SF=OF or SF=0 */
18201 case LT
: /* SF<>OF or SF=1 */
18202 if (op1
== const0_rtx
)
18205 /* For other cases Carry flag is not required. */
18207 /* Codes doable only with sign flag when comparing
18208 against zero, but we miss jump instruction for it
18209 so we need to use relational tests against overflow
18210 that thus needs to be zero. */
18211 case GT
: /* ZF=0 & SF=OF */
18212 case LE
: /* ZF=1 | SF<>OF */
18213 if (op1
== const0_rtx
)
18217 /* strcmp pattern do (use flags) and combine may ask us for proper
18222 gcc_unreachable ();
18226 /* Return the fixed registers used for condition codes. */
18229 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18236 /* If two condition code modes are compatible, return a condition code
18237 mode which is compatible with both. Otherwise, return
18240 static enum machine_mode
18241 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18246 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18249 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18250 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18253 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18255 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18261 gcc_unreachable ();
18291 /* These are only compatible with themselves, which we already
18298 /* Return a comparison we can do and that it is equivalent to
18299 swap_condition (code) apart possibly from orderedness.
18300 But, never change orderedness if TARGET_IEEE_FP, returning
18301 UNKNOWN in that case if necessary. */
18303 static enum rtx_code
18304 ix86_fp_swap_condition (enum rtx_code code
)
18308 case GT
: /* GTU - CF=0 & ZF=0 */
18309 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18310 case GE
: /* GEU - CF=0 */
18311 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18312 case UNLT
: /* LTU - CF=1 */
18313 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18314 case UNLE
: /* LEU - CF=1 | ZF=1 */
18315 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18317 return swap_condition (code
);
18321 /* Return cost of comparison CODE using the best strategy for performance.
18322 All following functions do use number of instructions as a cost metrics.
18323 In future this should be tweaked to compute bytes for optimize_size and
18324 take into account performance of various instructions on various CPUs. */
18327 ix86_fp_comparison_cost (enum rtx_code code
)
18331 /* The cost of code using bit-twiddling on %ah. */
18348 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18352 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18355 gcc_unreachable ();
18358 switch (ix86_fp_comparison_strategy (code
))
18360 case IX86_FPCMP_COMI
:
18361 return arith_cost
> 4 ? 3 : 2;
18362 case IX86_FPCMP_SAHF
:
18363 return arith_cost
> 4 ? 4 : 3;
18369 /* Return strategy to use for floating-point. We assume that fcomi is always
18370 preferrable where available, since that is also true when looking at size
18371 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18373 enum ix86_fpcmp_strategy
18374 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18376 /* Do fcomi/sahf based test when profitable. */
18379 return IX86_FPCMP_COMI
;
18381 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18382 return IX86_FPCMP_SAHF
;
18384 return IX86_FPCMP_ARITH
;
18387 /* Swap, force into registers, or otherwise massage the two operands
18388 to a fp comparison. The operands are updated in place; the new
18389 comparison code is returned. */
18391 static enum rtx_code
18392 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18394 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18395 rtx op0
= *pop0
, op1
= *pop1
;
18396 enum machine_mode op_mode
= GET_MODE (op0
);
18397 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18399 /* All of the unordered compare instructions only work on registers.
18400 The same is true of the fcomi compare instructions. The XFmode
18401 compare instructions require registers except when comparing
18402 against zero or when converting operand 1 from fixed point to
18406 && (fpcmp_mode
== CCFPUmode
18407 || (op_mode
== XFmode
18408 && ! (standard_80387_constant_p (op0
) == 1
18409 || standard_80387_constant_p (op1
) == 1)
18410 && GET_CODE (op1
) != FLOAT
)
18411 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18413 op0
= force_reg (op_mode
, op0
);
18414 op1
= force_reg (op_mode
, op1
);
18418 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18419 things around if they appear profitable, otherwise force op0
18420 into a register. */
18422 if (standard_80387_constant_p (op0
) == 0
18424 && ! (standard_80387_constant_p (op1
) == 0
18427 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18428 if (new_code
!= UNKNOWN
)
18431 tmp
= op0
, op0
= op1
, op1
= tmp
;
18437 op0
= force_reg (op_mode
, op0
);
18439 if (CONSTANT_P (op1
))
18441 int tmp
= standard_80387_constant_p (op1
);
18443 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18447 op1
= force_reg (op_mode
, op1
);
18450 op1
= force_reg (op_mode
, op1
);
18454 /* Try to rearrange the comparison to make it cheaper. */
18455 if (ix86_fp_comparison_cost (code
)
18456 > ix86_fp_comparison_cost (swap_condition (code
))
18457 && (REG_P (op1
) || can_create_pseudo_p ()))
18460 tmp
= op0
, op0
= op1
, op1
= tmp
;
18461 code
= swap_condition (code
);
18463 op0
= force_reg (op_mode
, op0
);
18471 /* Convert comparison codes we use to represent FP comparison to integer
18472 code that will result in proper branch. Return UNKNOWN if no such code
18476 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18505 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18508 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18510 enum machine_mode fpcmp_mode
, intcmp_mode
;
18513 fpcmp_mode
= ix86_fp_compare_mode (code
);
18514 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18516 /* Do fcomi/sahf based test when profitable. */
18517 switch (ix86_fp_comparison_strategy (code
))
18519 case IX86_FPCMP_COMI
:
18520 intcmp_mode
= fpcmp_mode
;
18521 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18522 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18527 case IX86_FPCMP_SAHF
:
18528 intcmp_mode
= fpcmp_mode
;
18529 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18530 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18534 scratch
= gen_reg_rtx (HImode
);
18535 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18536 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18539 case IX86_FPCMP_ARITH
:
18540 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18541 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18542 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18544 scratch
= gen_reg_rtx (HImode
);
18545 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18547 /* In the unordered case, we have to check C2 for NaN's, which
18548 doesn't happen to work out to anything nice combination-wise.
18549 So do some bit twiddling on the value we've got in AH to come
18550 up with an appropriate set of condition codes. */
18552 intcmp_mode
= CCNOmode
;
18557 if (code
== GT
|| !TARGET_IEEE_FP
)
18559 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18564 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18565 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18566 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18567 intcmp_mode
= CCmode
;
18573 if (code
== LT
&& TARGET_IEEE_FP
)
18575 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18576 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18577 intcmp_mode
= CCmode
;
18582 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18588 if (code
== GE
|| !TARGET_IEEE_FP
)
18590 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18595 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18596 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18602 if (code
== LE
&& TARGET_IEEE_FP
)
18604 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18605 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18606 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18607 intcmp_mode
= CCmode
;
18612 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18618 if (code
== EQ
&& TARGET_IEEE_FP
)
18620 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18621 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18622 intcmp_mode
= CCmode
;
18627 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18633 if (code
== NE
&& TARGET_IEEE_FP
)
18635 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18636 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18642 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18648 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18652 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18657 gcc_unreachable ();
18665 /* Return the test that should be put into the flags user, i.e.
18666 the bcc, scc, or cmov instruction. */
18667 return gen_rtx_fmt_ee (code
, VOIDmode
,
18668 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18673 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18677 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18678 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18680 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18682 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18683 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18686 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18692 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18694 enum machine_mode mode
= GET_MODE (op0
);
18706 tmp
= ix86_expand_compare (code
, op0
, op1
);
18707 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18708 gen_rtx_LABEL_REF (VOIDmode
, label
),
18710 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18717 /* Expand DImode branch into multiple compare+branch. */
18719 rtx lo
[2], hi
[2], label2
;
18720 enum rtx_code code1
, code2
, code3
;
18721 enum machine_mode submode
;
18723 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18725 tmp
= op0
, op0
= op1
, op1
= tmp
;
18726 code
= swap_condition (code
);
18729 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18730 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18732 submode
= mode
== DImode
? SImode
: DImode
;
18734 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18735 avoid two branches. This costs one extra insn, so disable when
18736 optimizing for size. */
18738 if ((code
== EQ
|| code
== NE
)
18739 && (!optimize_insn_for_size_p ()
18740 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18745 if (hi
[1] != const0_rtx
)
18746 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18747 NULL_RTX
, 0, OPTAB_WIDEN
);
18750 if (lo
[1] != const0_rtx
)
18751 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18752 NULL_RTX
, 0, OPTAB_WIDEN
);
18754 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18755 NULL_RTX
, 0, OPTAB_WIDEN
);
18757 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18761 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18762 op1 is a constant and the low word is zero, then we can just
18763 examine the high word. Similarly for low word -1 and
18764 less-or-equal-than or greater-than. */
18766 if (CONST_INT_P (hi
[1]))
18769 case LT
: case LTU
: case GE
: case GEU
:
18770 if (lo
[1] == const0_rtx
)
18772 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18776 case LE
: case LEU
: case GT
: case GTU
:
18777 if (lo
[1] == constm1_rtx
)
18779 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18787 /* Otherwise, we need two or three jumps. */
18789 label2
= gen_label_rtx ();
18792 code2
= swap_condition (code
);
18793 code3
= unsigned_condition (code
);
18797 case LT
: case GT
: case LTU
: case GTU
:
18800 case LE
: code1
= LT
; code2
= GT
; break;
18801 case GE
: code1
= GT
; code2
= LT
; break;
18802 case LEU
: code1
= LTU
; code2
= GTU
; break;
18803 case GEU
: code1
= GTU
; code2
= LTU
; break;
18805 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18806 case NE
: code2
= UNKNOWN
; break;
18809 gcc_unreachable ();
18814 * if (hi(a) < hi(b)) goto true;
18815 * if (hi(a) > hi(b)) goto false;
18816 * if (lo(a) < lo(b)) goto true;
18820 if (code1
!= UNKNOWN
)
18821 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18822 if (code2
!= UNKNOWN
)
18823 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18825 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18827 if (code2
!= UNKNOWN
)
18828 emit_label (label2
);
18833 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18838 /* Split branch based on floating point condition. */
18840 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18841 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18846 if (target2
!= pc_rtx
)
18849 code
= reverse_condition_maybe_unordered (code
);
18854 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18857 /* Remove pushed operand from stack. */
18859 ix86_free_from_memory (GET_MODE (pushed
));
18861 i
= emit_jump_insn (gen_rtx_SET
18863 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18864 condition
, target1
, target2
)));
18865 if (split_branch_probability
>= 0)
18866 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18870 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18874 gcc_assert (GET_MODE (dest
) == QImode
);
18876 ret
= ix86_expand_compare (code
, op0
, op1
);
18877 PUT_MODE (ret
, QImode
);
18878 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18881 /* Expand comparison setting or clearing carry flag. Return true when
18882 successful and set pop for the operation. */
18884 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18886 enum machine_mode mode
=
18887 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18889 /* Do not handle double-mode compares that go through special path. */
18890 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18893 if (SCALAR_FLOAT_MODE_P (mode
))
18895 rtx compare_op
, compare_seq
;
18897 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18899 /* Shortcut: following common codes never translate
18900 into carry flag compares. */
18901 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18902 || code
== ORDERED
|| code
== UNORDERED
)
18905 /* These comparisons require zero flag; swap operands so they won't. */
18906 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18907 && !TARGET_IEEE_FP
)
18912 code
= swap_condition (code
);
18915 /* Try to expand the comparison and verify that we end up with
18916 carry flag based comparison. This fails to be true only when
18917 we decide to expand comparison using arithmetic that is not
18918 too common scenario. */
18920 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18921 compare_seq
= get_insns ();
18924 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18925 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18926 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18928 code
= GET_CODE (compare_op
);
18930 if (code
!= LTU
&& code
!= GEU
)
18933 emit_insn (compare_seq
);
18938 if (!INTEGRAL_MODE_P (mode
))
18947 /* Convert a==0 into (unsigned)a<1. */
18950 if (op1
!= const0_rtx
)
18953 code
= (code
== EQ
? LTU
: GEU
);
18956 /* Convert a>b into b<a or a>=b-1. */
18959 if (CONST_INT_P (op1
))
18961 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18962 /* Bail out on overflow. We still can swap operands but that
18963 would force loading of the constant into register. */
18964 if (op1
== const0_rtx
18965 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18967 code
= (code
== GTU
? GEU
: LTU
);
18974 code
= (code
== GTU
? LTU
: GEU
);
18978 /* Convert a>=0 into (unsigned)a<0x80000000. */
18981 if (mode
== DImode
|| op1
!= const0_rtx
)
18983 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18984 code
= (code
== LT
? GEU
: LTU
);
18988 if (mode
== DImode
|| op1
!= constm1_rtx
)
18990 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18991 code
= (code
== LE
? GEU
: LTU
);
18997 /* Swapping operands may cause constant to appear as first operand. */
18998 if (!nonimmediate_operand (op0
, VOIDmode
))
19000 if (!can_create_pseudo_p ())
19002 op0
= force_reg (mode
, op0
);
19004 *pop
= ix86_expand_compare (code
, op0
, op1
);
19005 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19010 ix86_expand_int_movcc (rtx operands
[])
19012 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19013 rtx compare_seq
, compare_op
;
19014 enum machine_mode mode
= GET_MODE (operands
[0]);
19015 bool sign_bit_compare_p
= false;
19016 rtx op0
= XEXP (operands
[1], 0);
19017 rtx op1
= XEXP (operands
[1], 1);
19019 if (GET_MODE (op0
) == TImode
19020 || (GET_MODE (op0
) == DImode
19025 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19026 compare_seq
= get_insns ();
19029 compare_code
= GET_CODE (compare_op
);
19031 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19032 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19033 sign_bit_compare_p
= true;
19035 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19036 HImode insns, we'd be swallowed in word prefix ops. */
19038 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19039 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19040 && CONST_INT_P (operands
[2])
19041 && CONST_INT_P (operands
[3]))
19043 rtx out
= operands
[0];
19044 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19045 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19046 HOST_WIDE_INT diff
;
19049 /* Sign bit compares are better done using shifts than we do by using
19051 if (sign_bit_compare_p
19052 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19054 /* Detect overlap between destination and compare sources. */
19057 if (!sign_bit_compare_p
)
19060 bool fpcmp
= false;
19062 compare_code
= GET_CODE (compare_op
);
19064 flags
= XEXP (compare_op
, 0);
19066 if (GET_MODE (flags
) == CCFPmode
19067 || GET_MODE (flags
) == CCFPUmode
)
19071 = ix86_fp_compare_code_to_integer (compare_code
);
19074 /* To simplify rest of code, restrict to the GEU case. */
19075 if (compare_code
== LTU
)
19077 HOST_WIDE_INT tmp
= ct
;
19080 compare_code
= reverse_condition (compare_code
);
19081 code
= reverse_condition (code
);
19086 PUT_CODE (compare_op
,
19087 reverse_condition_maybe_unordered
19088 (GET_CODE (compare_op
)));
19090 PUT_CODE (compare_op
,
19091 reverse_condition (GET_CODE (compare_op
)));
19095 if (reg_overlap_mentioned_p (out
, op0
)
19096 || reg_overlap_mentioned_p (out
, op1
))
19097 tmp
= gen_reg_rtx (mode
);
19099 if (mode
== DImode
)
19100 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19102 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19103 flags
, compare_op
));
19107 if (code
== GT
|| code
== GE
)
19108 code
= reverse_condition (code
);
19111 HOST_WIDE_INT tmp
= ct
;
19116 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19129 tmp
= expand_simple_binop (mode
, PLUS
,
19131 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19142 tmp
= expand_simple_binop (mode
, IOR
,
19144 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19146 else if (diff
== -1 && ct
)
19156 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19158 tmp
= expand_simple_binop (mode
, PLUS
,
19159 copy_rtx (tmp
), GEN_INT (cf
),
19160 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19168 * andl cf - ct, dest
19178 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19181 tmp
= expand_simple_binop (mode
, AND
,
19183 gen_int_mode (cf
- ct
, mode
),
19184 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19186 tmp
= expand_simple_binop (mode
, PLUS
,
19187 copy_rtx (tmp
), GEN_INT (ct
),
19188 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19191 if (!rtx_equal_p (tmp
, out
))
19192 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19199 enum machine_mode cmp_mode
= GET_MODE (op0
);
19202 tmp
= ct
, ct
= cf
, cf
= tmp
;
19205 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19207 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19209 /* We may be reversing unordered compare to normal compare, that
19210 is not valid in general (we may convert non-trapping condition
19211 to trapping one), however on i386 we currently emit all
19212 comparisons unordered. */
19213 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19214 code
= reverse_condition_maybe_unordered (code
);
19218 compare_code
= reverse_condition (compare_code
);
19219 code
= reverse_condition (code
);
19223 compare_code
= UNKNOWN
;
19224 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19225 && CONST_INT_P (op1
))
19227 if (op1
== const0_rtx
19228 && (code
== LT
|| code
== GE
))
19229 compare_code
= code
;
19230 else if (op1
== constm1_rtx
)
19234 else if (code
== GT
)
19239 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19240 if (compare_code
!= UNKNOWN
19241 && GET_MODE (op0
) == GET_MODE (out
)
19242 && (cf
== -1 || ct
== -1))
19244 /* If lea code below could be used, only optimize
19245 if it results in a 2 insn sequence. */
19247 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19248 || diff
== 3 || diff
== 5 || diff
== 9)
19249 || (compare_code
== LT
&& ct
== -1)
19250 || (compare_code
== GE
&& cf
== -1))
19253 * notl op1 (if necessary)
19261 code
= reverse_condition (code
);
19264 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19266 out
= expand_simple_binop (mode
, IOR
,
19268 out
, 1, OPTAB_DIRECT
);
19269 if (out
!= operands
[0])
19270 emit_move_insn (operands
[0], out
);
19277 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19278 || diff
== 3 || diff
== 5 || diff
== 9)
19279 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19281 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19287 * lea cf(dest*(ct-cf)),dest
19291 * This also catches the degenerate setcc-only case.
19297 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19300 /* On x86_64 the lea instruction operates on Pmode, so we need
19301 to get arithmetics done in proper mode to match. */
19303 tmp
= copy_rtx (out
);
19307 out1
= copy_rtx (out
);
19308 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19312 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19318 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19321 if (!rtx_equal_p (tmp
, out
))
19324 out
= force_operand (tmp
, copy_rtx (out
));
19326 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19328 if (!rtx_equal_p (out
, operands
[0]))
19329 emit_move_insn (operands
[0], copy_rtx (out
));
19335 * General case: Jumpful:
19336 * xorl dest,dest cmpl op1, op2
19337 * cmpl op1, op2 movl ct, dest
19338 * setcc dest jcc 1f
19339 * decl dest movl cf, dest
19340 * andl (cf-ct),dest 1:
19343 * Size 20. Size 14.
19345 * This is reasonably steep, but branch mispredict costs are
19346 * high on modern cpus, so consider failing only if optimizing
19350 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19351 && BRANCH_COST (optimize_insn_for_speed_p (),
19356 enum machine_mode cmp_mode
= GET_MODE (op0
);
19361 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19363 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19365 /* We may be reversing unordered compare to normal compare,
19366 that is not valid in general (we may convert non-trapping
19367 condition to trapping one), however on i386 we currently
19368 emit all comparisons unordered. */
19369 code
= reverse_condition_maybe_unordered (code
);
19373 code
= reverse_condition (code
);
19374 if (compare_code
!= UNKNOWN
)
19375 compare_code
= reverse_condition (compare_code
);
19379 if (compare_code
!= UNKNOWN
)
19381 /* notl op1 (if needed)
19386 For x < 0 (resp. x <= -1) there will be no notl,
19387 so if possible swap the constants to get rid of the
19389 True/false will be -1/0 while code below (store flag
19390 followed by decrement) is 0/-1, so the constants need
19391 to be exchanged once more. */
19393 if (compare_code
== GE
|| !cf
)
19395 code
= reverse_condition (code
);
19400 HOST_WIDE_INT tmp
= cf
;
19405 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19409 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19411 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19413 copy_rtx (out
), 1, OPTAB_DIRECT
);
19416 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19417 gen_int_mode (cf
- ct
, mode
),
19418 copy_rtx (out
), 1, OPTAB_DIRECT
);
19420 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19421 copy_rtx (out
), 1, OPTAB_DIRECT
);
19422 if (!rtx_equal_p (out
, operands
[0]))
19423 emit_move_insn (operands
[0], copy_rtx (out
));
19429 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19431 /* Try a few things more with specific constants and a variable. */
19434 rtx var
, orig_out
, out
, tmp
;
19436 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19439 /* If one of the two operands is an interesting constant, load a
19440 constant with the above and mask it in with a logical operation. */
19442 if (CONST_INT_P (operands
[2]))
19445 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19446 operands
[3] = constm1_rtx
, op
= and_optab
;
19447 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19448 operands
[3] = const0_rtx
, op
= ior_optab
;
19452 else if (CONST_INT_P (operands
[3]))
19455 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19456 operands
[2] = constm1_rtx
, op
= and_optab
;
19457 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19458 operands
[2] = const0_rtx
, op
= ior_optab
;
19465 orig_out
= operands
[0];
19466 tmp
= gen_reg_rtx (mode
);
19469 /* Recurse to get the constant loaded. */
19470 if (ix86_expand_int_movcc (operands
) == 0)
19473 /* Mask in the interesting variable. */
19474 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19476 if (!rtx_equal_p (out
, orig_out
))
19477 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19483 * For comparison with above,
19493 if (! nonimmediate_operand (operands
[2], mode
))
19494 operands
[2] = force_reg (mode
, operands
[2]);
19495 if (! nonimmediate_operand (operands
[3], mode
))
19496 operands
[3] = force_reg (mode
, operands
[3]);
19498 if (! register_operand (operands
[2], VOIDmode
)
19500 || ! register_operand (operands
[3], VOIDmode
)))
19501 operands
[2] = force_reg (mode
, operands
[2]);
19504 && ! register_operand (operands
[3], VOIDmode
))
19505 operands
[3] = force_reg (mode
, operands
[3]);
19507 emit_insn (compare_seq
);
19508 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19509 gen_rtx_IF_THEN_ELSE (mode
,
19510 compare_op
, operands
[2],
19515 /* Swap, force into registers, or otherwise massage the two operands
19516 to an sse comparison with a mask result. Thus we differ a bit from
19517 ix86_prepare_fp_compare_args which expects to produce a flags result.
19519 The DEST operand exists to help determine whether to commute commutative
19520 operators. The POP0/POP1 operands are updated in place. The new
19521 comparison code is returned, or UNKNOWN if not implementable. */
19523 static enum rtx_code
19524 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19525 rtx
*pop0
, rtx
*pop1
)
19533 /* AVX supports all the needed comparisons. */
19536 /* We have no LTGT as an operator. We could implement it with
19537 NE & ORDERED, but this requires an extra temporary. It's
19538 not clear that it's worth it. */
19545 /* These are supported directly. */
19552 /* AVX has 3 operand comparisons, no need to swap anything. */
19555 /* For commutative operators, try to canonicalize the destination
19556 operand to be first in the comparison - this helps reload to
19557 avoid extra moves. */
19558 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19566 /* These are not supported directly before AVX, and furthermore
19567 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19568 comparison operands to transform into something that is
19573 code
= swap_condition (code
);
19577 gcc_unreachable ();
19583 /* Detect conditional moves that exactly match min/max operational
19584 semantics. Note that this is IEEE safe, as long as we don't
19585 interchange the operands.
19587 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19588 and TRUE if the operation is successful and instructions are emitted. */
19591 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19592 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19594 enum machine_mode mode
;
19600 else if (code
== UNGE
)
19603 if_true
= if_false
;
19609 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19611 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19616 mode
= GET_MODE (dest
);
19618 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19619 but MODE may be a vector mode and thus not appropriate. */
19620 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19622 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19625 if_true
= force_reg (mode
, if_true
);
19626 v
= gen_rtvec (2, if_true
, if_false
);
19627 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19631 code
= is_min
? SMIN
: SMAX
;
19632 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19635 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19639 /* Expand an sse vector comparison. Return the register with the result. */
19642 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19643 rtx op_true
, rtx op_false
)
19645 enum machine_mode mode
= GET_MODE (dest
);
19646 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19649 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19650 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19651 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19654 || reg_overlap_mentioned_p (dest
, op_true
)
19655 || reg_overlap_mentioned_p (dest
, op_false
))
19656 dest
= gen_reg_rtx (mode
);
19658 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19659 if (cmp_mode
!= mode
)
19661 x
= force_reg (cmp_mode
, x
);
19662 convert_move (dest
, x
, false);
19665 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19670 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19671 operations. This is used for both scalar and vector conditional moves. */
19674 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19676 enum machine_mode mode
= GET_MODE (dest
);
19679 if (vector_all_ones_operand (op_true
, mode
)
19680 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19682 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19684 else if (op_false
== CONST0_RTX (mode
))
19686 op_true
= force_reg (mode
, op_true
);
19687 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19688 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19690 else if (op_true
== CONST0_RTX (mode
))
19692 op_false
= force_reg (mode
, op_false
);
19693 x
= gen_rtx_NOT (mode
, cmp
);
19694 x
= gen_rtx_AND (mode
, x
, op_false
);
19695 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19697 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19699 op_false
= force_reg (mode
, op_false
);
19700 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19701 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19703 else if (TARGET_XOP
)
19705 op_true
= force_reg (mode
, op_true
);
19707 if (!nonimmediate_operand (op_false
, mode
))
19708 op_false
= force_reg (mode
, op_false
);
19710 emit_insn (gen_rtx_SET (mode
, dest
,
19711 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19717 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19719 if (!nonimmediate_operand (op_true
, mode
))
19720 op_true
= force_reg (mode
, op_true
);
19722 op_false
= force_reg (mode
, op_false
);
19728 gen
= gen_sse4_1_blendvps
;
19732 gen
= gen_sse4_1_blendvpd
;
19740 gen
= gen_sse4_1_pblendvb
;
19741 dest
= gen_lowpart (V16QImode
, dest
);
19742 op_false
= gen_lowpart (V16QImode
, op_false
);
19743 op_true
= gen_lowpart (V16QImode
, op_true
);
19744 cmp
= gen_lowpart (V16QImode
, cmp
);
19749 gen
= gen_avx_blendvps256
;
19753 gen
= gen_avx_blendvpd256
;
19761 gen
= gen_avx2_pblendvb
;
19762 dest
= gen_lowpart (V32QImode
, dest
);
19763 op_false
= gen_lowpart (V32QImode
, op_false
);
19764 op_true
= gen_lowpart (V32QImode
, op_true
);
19765 cmp
= gen_lowpart (V32QImode
, cmp
);
19773 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19776 op_true
= force_reg (mode
, op_true
);
19778 t2
= gen_reg_rtx (mode
);
19780 t3
= gen_reg_rtx (mode
);
19784 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19785 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19787 x
= gen_rtx_NOT (mode
, cmp
);
19788 x
= gen_rtx_AND (mode
, x
, op_false
);
19789 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19791 x
= gen_rtx_IOR (mode
, t3
, t2
);
19792 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19797 /* Expand a floating-point conditional move. Return true if successful. */
19800 ix86_expand_fp_movcc (rtx operands
[])
19802 enum machine_mode mode
= GET_MODE (operands
[0]);
19803 enum rtx_code code
= GET_CODE (operands
[1]);
19804 rtx tmp
, compare_op
;
19805 rtx op0
= XEXP (operands
[1], 0);
19806 rtx op1
= XEXP (operands
[1], 1);
19808 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19810 enum machine_mode cmode
;
19812 /* Since we've no cmove for sse registers, don't force bad register
19813 allocation just to gain access to it. Deny movcc when the
19814 comparison mode doesn't match the move mode. */
19815 cmode
= GET_MODE (op0
);
19816 if (cmode
== VOIDmode
)
19817 cmode
= GET_MODE (op1
);
19821 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19822 if (code
== UNKNOWN
)
19825 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19826 operands
[2], operands
[3]))
19829 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19830 operands
[2], operands
[3]);
19831 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19835 /* The floating point conditional move instructions don't directly
19836 support conditions resulting from a signed integer comparison. */
19838 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19839 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19841 tmp
= gen_reg_rtx (QImode
);
19842 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19844 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19847 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19848 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19849 operands
[2], operands
[3])));
19854 /* Expand a floating-point vector conditional move; a vcond operation
19855 rather than a movcc operation. */
19858 ix86_expand_fp_vcond (rtx operands
[])
19860 enum rtx_code code
= GET_CODE (operands
[3]);
19863 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19864 &operands
[4], &operands
[5]);
19865 if (code
== UNKNOWN
)
19868 switch (GET_CODE (operands
[3]))
19871 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19872 operands
[5], operands
[0], operands
[0]);
19873 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19874 operands
[5], operands
[1], operands
[2]);
19878 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19879 operands
[5], operands
[0], operands
[0]);
19880 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19881 operands
[5], operands
[1], operands
[2]);
19885 gcc_unreachable ();
19887 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19889 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19893 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19894 operands
[5], operands
[1], operands
[2]))
19897 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19898 operands
[1], operands
[2]);
19899 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19903 /* Expand a signed/unsigned integral vector conditional move. */
19906 ix86_expand_int_vcond (rtx operands
[])
19908 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19909 enum machine_mode mode
= GET_MODE (operands
[4]);
19910 enum rtx_code code
= GET_CODE (operands
[3]);
19911 bool negate
= false;
19914 cop0
= operands
[4];
19915 cop1
= operands
[5];
19917 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19918 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19919 if ((code
== LT
|| code
== GE
)
19920 && data_mode
== mode
19921 && cop1
== CONST0_RTX (mode
)
19922 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
19923 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
19924 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
19925 && (GET_MODE_SIZE (data_mode
) == 16
19926 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
19928 rtx negop
= operands
[2 - (code
== LT
)];
19929 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
19930 if (negop
== CONST1_RTX (data_mode
))
19932 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
19933 operands
[0], 1, OPTAB_DIRECT
);
19934 if (res
!= operands
[0])
19935 emit_move_insn (operands
[0], res
);
19938 else if (GET_MODE_INNER (data_mode
) != DImode
19939 && vector_all_ones_operand (negop
, data_mode
))
19941 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
19942 operands
[0], 0, OPTAB_DIRECT
);
19943 if (res
!= operands
[0])
19944 emit_move_insn (operands
[0], res
);
19949 if (!nonimmediate_operand (cop1
, mode
))
19950 cop1
= force_reg (mode
, cop1
);
19951 if (!general_operand (operands
[1], data_mode
))
19952 operands
[1] = force_reg (data_mode
, operands
[1]);
19953 if (!general_operand (operands
[2], data_mode
))
19954 operands
[2] = force_reg (data_mode
, operands
[2]);
19956 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19958 && (mode
== V16QImode
|| mode
== V8HImode
19959 || mode
== V4SImode
|| mode
== V2DImode
))
19963 /* Canonicalize the comparison to EQ, GT, GTU. */
19974 code
= reverse_condition (code
);
19980 code
= reverse_condition (code
);
19986 code
= swap_condition (code
);
19987 x
= cop0
, cop0
= cop1
, cop1
= x
;
19991 gcc_unreachable ();
19994 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19995 if (mode
== V2DImode
)
20000 /* SSE4.1 supports EQ. */
20001 if (!TARGET_SSE4_1
)
20007 /* SSE4.2 supports GT/GTU. */
20008 if (!TARGET_SSE4_2
)
20013 gcc_unreachable ();
20017 /* Unsigned parallel compare is not supported by the hardware.
20018 Play some tricks to turn this into a signed comparison
20022 cop0
= force_reg (mode
, cop0
);
20032 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20036 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20037 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20038 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20039 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20041 gcc_unreachable ();
20043 /* Subtract (-(INT MAX) - 1) from both operands to make
20045 mask
= ix86_build_signbit_mask (mode
, true, false);
20046 t1
= gen_reg_rtx (mode
);
20047 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20049 t2
= gen_reg_rtx (mode
);
20050 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20062 /* Perform a parallel unsigned saturating subtraction. */
20063 x
= gen_reg_rtx (mode
);
20064 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20065 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20068 cop1
= CONST0_RTX (mode
);
20074 gcc_unreachable ();
20079 /* Allow the comparison to be done in one mode, but the movcc to
20080 happen in another mode. */
20081 if (data_mode
== mode
)
20083 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20084 operands
[1+negate
], operands
[2-negate
]);
20088 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20089 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20091 operands
[1+negate
], operands
[2-negate
]);
20092 x
= gen_lowpart (data_mode
, x
);
20095 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20096 operands
[2-negate
]);
20100 /* Expand a variable vector permutation. */
20103 ix86_expand_vec_perm (rtx operands
[])
20105 rtx target
= operands
[0];
20106 rtx op0
= operands
[1];
20107 rtx op1
= operands
[2];
20108 rtx mask
= operands
[3];
20109 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20110 enum machine_mode mode
= GET_MODE (op0
);
20111 enum machine_mode maskmode
= GET_MODE (mask
);
20113 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20115 /* Number of elements in the vector. */
20116 w
= GET_MODE_NUNITS (mode
);
20117 e
= GET_MODE_UNIT_SIZE (mode
);
20118 gcc_assert (w
<= 32);
20122 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20124 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20125 an constant shuffle operand. With a tiny bit of effort we can
20126 use VPERMD instead. A re-interpretation stall for V4DFmode is
20127 unfortunate but there's no avoiding it.
20128 Similarly for V16HImode we don't have instructions for variable
20129 shuffling, while for V32QImode we can use after preparing suitable
20130 masks vpshufb; vpshufb; vpermq; vpor. */
20132 if (mode
== V16HImode
)
20134 maskmode
= mode
= V32QImode
;
20140 maskmode
= mode
= V8SImode
;
20144 t1
= gen_reg_rtx (maskmode
);
20146 /* Replicate the low bits of the V4DImode mask into V8SImode:
20148 t1 = { A A B B C C D D }. */
20149 for (i
= 0; i
< w
/ 2; ++i
)
20150 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20151 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20152 vt
= force_reg (maskmode
, vt
);
20153 mask
= gen_lowpart (maskmode
, mask
);
20154 if (maskmode
== V8SImode
)
20155 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20157 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20159 /* Multiply the shuffle indicies by two. */
20160 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20163 /* Add one to the odd shuffle indicies:
20164 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20165 for (i
= 0; i
< w
/ 2; ++i
)
20167 vec
[i
* 2] = const0_rtx
;
20168 vec
[i
* 2 + 1] = const1_rtx
;
20170 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20171 vt
= force_const_mem (maskmode
, vt
);
20172 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20175 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20176 operands
[3] = mask
= t1
;
20177 target
= gen_lowpart (mode
, target
);
20178 op0
= gen_lowpart (mode
, op0
);
20179 op1
= gen_lowpart (mode
, op1
);
20185 /* The VPERMD and VPERMPS instructions already properly ignore
20186 the high bits of the shuffle elements. No need for us to
20187 perform an AND ourselves. */
20188 if (one_operand_shuffle
)
20189 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20192 t1
= gen_reg_rtx (V8SImode
);
20193 t2
= gen_reg_rtx (V8SImode
);
20194 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20195 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20201 mask
= gen_lowpart (V8SFmode
, mask
);
20202 if (one_operand_shuffle
)
20203 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20206 t1
= gen_reg_rtx (V8SFmode
);
20207 t2
= gen_reg_rtx (V8SFmode
);
20208 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20209 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20215 /* By combining the two 128-bit input vectors into one 256-bit
20216 input vector, we can use VPERMD and VPERMPS for the full
20217 two-operand shuffle. */
20218 t1
= gen_reg_rtx (V8SImode
);
20219 t2
= gen_reg_rtx (V8SImode
);
20220 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20221 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20222 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20223 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20227 t1
= gen_reg_rtx (V8SFmode
);
20228 t2
= gen_reg_rtx (V8SImode
);
20229 mask
= gen_lowpart (V4SImode
, mask
);
20230 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20231 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20232 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20233 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20237 t1
= gen_reg_rtx (V32QImode
);
20238 t2
= gen_reg_rtx (V32QImode
);
20239 t3
= gen_reg_rtx (V32QImode
);
20240 vt2
= GEN_INT (128);
20241 for (i
= 0; i
< 32; i
++)
20243 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20244 vt
= force_reg (V32QImode
, vt
);
20245 for (i
= 0; i
< 32; i
++)
20246 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20247 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20248 vt2
= force_reg (V32QImode
, vt2
);
20249 /* From mask create two adjusted masks, which contain the same
20250 bits as mask in the low 7 bits of each vector element.
20251 The first mask will have the most significant bit clear
20252 if it requests element from the same 128-bit lane
20253 and MSB set if it requests element from the other 128-bit lane.
20254 The second mask will have the opposite values of the MSB,
20255 and additionally will have its 128-bit lanes swapped.
20256 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20257 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20258 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20259 stands for other 12 bytes. */
20260 /* The bit whether element is from the same lane or the other
20261 lane is bit 4, so shift it up by 3 to the MSB position. */
20262 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20263 gen_lowpart (V4DImode
, mask
),
20265 /* Clear MSB bits from the mask just in case it had them set. */
20266 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20267 /* After this t1 will have MSB set for elements from other lane. */
20268 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20269 /* Clear bits other than MSB. */
20270 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20271 /* Or in the lower bits from mask into t3. */
20272 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20273 /* And invert MSB bits in t1, so MSB is set for elements from the same
20275 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20276 /* Swap 128-bit lanes in t3. */
20277 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20278 gen_lowpart (V4DImode
, t3
),
20279 const2_rtx
, GEN_INT (3),
20280 const0_rtx
, const1_rtx
));
20281 /* And or in the lower bits from mask into t1. */
20282 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20283 if (one_operand_shuffle
)
20285 /* Each of these shuffles will put 0s in places where
20286 element from the other 128-bit lane is needed, otherwise
20287 will shuffle in the requested value. */
20288 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20289 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20290 /* For t3 the 128-bit lanes are swapped again. */
20291 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20292 gen_lowpart (V4DImode
, t3
),
20293 const2_rtx
, GEN_INT (3),
20294 const0_rtx
, const1_rtx
));
20295 /* And oring both together leads to the result. */
20296 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20300 t4
= gen_reg_rtx (V32QImode
);
20301 /* Similarly to the above one_operand_shuffle code,
20302 just for repeated twice for each operand. merge_two:
20303 code will merge the two results together. */
20304 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20305 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20306 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20307 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20308 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20309 gen_lowpart (V4DImode
, t4
),
20310 const2_rtx
, GEN_INT (3),
20311 const0_rtx
, const1_rtx
));
20312 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20313 gen_lowpart (V4DImode
, t3
),
20314 const2_rtx
, GEN_INT (3),
20315 const0_rtx
, const1_rtx
));
20316 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20317 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20323 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20330 /* The XOP VPPERM insn supports three inputs. By ignoring the
20331 one_operand_shuffle special case, we avoid creating another
20332 set of constant vectors in memory. */
20333 one_operand_shuffle
= false;
20335 /* mask = mask & {2*w-1, ...} */
20336 vt
= GEN_INT (2*w
- 1);
20340 /* mask = mask & {w-1, ...} */
20341 vt
= GEN_INT (w
- 1);
20344 for (i
= 0; i
< w
; i
++)
20346 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20347 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20348 NULL_RTX
, 0, OPTAB_DIRECT
);
20350 /* For non-QImode operations, convert the word permutation control
20351 into a byte permutation control. */
20352 if (mode
!= V16QImode
)
20354 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20355 GEN_INT (exact_log2 (e
)),
20356 NULL_RTX
, 0, OPTAB_DIRECT
);
20358 /* Convert mask to vector of chars. */
20359 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20361 /* Replicate each of the input bytes into byte positions:
20362 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20363 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20364 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20365 for (i
= 0; i
< 16; ++i
)
20366 vec
[i
] = GEN_INT (i
/e
* e
);
20367 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20368 vt
= force_const_mem (V16QImode
, vt
);
20370 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20372 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20374 /* Convert it into the byte positions by doing
20375 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20376 for (i
= 0; i
< 16; ++i
)
20377 vec
[i
] = GEN_INT (i
% e
);
20378 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20379 vt
= force_const_mem (V16QImode
, vt
);
20380 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20383 /* The actual shuffle operations all operate on V16QImode. */
20384 op0
= gen_lowpart (V16QImode
, op0
);
20385 op1
= gen_lowpart (V16QImode
, op1
);
20386 target
= gen_lowpart (V16QImode
, target
);
20390 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20392 else if (one_operand_shuffle
)
20394 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20401 /* Shuffle the two input vectors independently. */
20402 t1
= gen_reg_rtx (V16QImode
);
20403 t2
= gen_reg_rtx (V16QImode
);
20404 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20405 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20408 /* Then merge them together. The key is whether any given control
20409 element contained a bit set that indicates the second word. */
20410 mask
= operands
[3];
20412 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20414 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20415 more shuffle to convert the V2DI input mask into a V4SI
20416 input mask. At which point the masking that expand_int_vcond
20417 will work as desired. */
20418 rtx t3
= gen_reg_rtx (V4SImode
);
20419 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20420 const0_rtx
, const0_rtx
,
20421 const2_rtx
, const2_rtx
));
20423 maskmode
= V4SImode
;
20427 for (i
= 0; i
< w
; i
++)
20429 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20430 vt
= force_reg (maskmode
, vt
);
20431 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20432 NULL_RTX
, 0, OPTAB_DIRECT
);
20434 xops
[0] = gen_lowpart (mode
, operands
[0]);
20435 xops
[1] = gen_lowpart (mode
, t2
);
20436 xops
[2] = gen_lowpart (mode
, t1
);
20437 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20440 ok
= ix86_expand_int_vcond (xops
);
20445 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20446 true if we should do zero extension, else sign extension. HIGH_P is
20447 true if we want the N/2 high elements, else the low elements. */
20450 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20452 enum machine_mode imode
= GET_MODE (src
);
20457 rtx (*unpack
)(rtx
, rtx
);
20458 rtx (*extract
)(rtx
, rtx
) = NULL
;
20459 enum machine_mode halfmode
= BLKmode
;
20465 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20467 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20468 halfmode
= V16QImode
;
20470 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20474 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20476 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20477 halfmode
= V8HImode
;
20479 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20483 unpack
= gen_avx2_zero_extendv4siv4di2
;
20485 unpack
= gen_avx2_sign_extendv4siv4di2
;
20486 halfmode
= V4SImode
;
20488 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20492 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20494 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20498 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20500 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20504 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20506 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20509 gcc_unreachable ();
20512 if (GET_MODE_SIZE (imode
) == 32)
20514 tmp
= gen_reg_rtx (halfmode
);
20515 emit_insn (extract (tmp
, src
));
20519 /* Shift higher 8 bytes to lower 8 bytes. */
20520 tmp
= gen_reg_rtx (imode
);
20521 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20522 gen_lowpart (V1TImode
, src
),
20528 emit_insn (unpack (dest
, tmp
));
20532 rtx (*unpack
)(rtx
, rtx
, rtx
);
20538 unpack
= gen_vec_interleave_highv16qi
;
20540 unpack
= gen_vec_interleave_lowv16qi
;
20544 unpack
= gen_vec_interleave_highv8hi
;
20546 unpack
= gen_vec_interleave_lowv8hi
;
20550 unpack
= gen_vec_interleave_highv4si
;
20552 unpack
= gen_vec_interleave_lowv4si
;
20555 gcc_unreachable ();
20559 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20561 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20562 src
, pc_rtx
, pc_rtx
);
20564 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20568 /* Expand conditional increment or decrement using adb/sbb instructions.
20569 The default case using setcc followed by the conditional move can be
20570 done by generic code. */
20572 ix86_expand_int_addcc (rtx operands
[])
20574 enum rtx_code code
= GET_CODE (operands
[1]);
20576 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20578 rtx val
= const0_rtx
;
20579 bool fpcmp
= false;
20580 enum machine_mode mode
;
20581 rtx op0
= XEXP (operands
[1], 0);
20582 rtx op1
= XEXP (operands
[1], 1);
20584 if (operands
[3] != const1_rtx
20585 && operands
[3] != constm1_rtx
)
20587 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20589 code
= GET_CODE (compare_op
);
20591 flags
= XEXP (compare_op
, 0);
20593 if (GET_MODE (flags
) == CCFPmode
20594 || GET_MODE (flags
) == CCFPUmode
)
20597 code
= ix86_fp_compare_code_to_integer (code
);
20604 PUT_CODE (compare_op
,
20605 reverse_condition_maybe_unordered
20606 (GET_CODE (compare_op
)));
20608 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20611 mode
= GET_MODE (operands
[0]);
20613 /* Construct either adc or sbb insn. */
20614 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20619 insn
= gen_subqi3_carry
;
20622 insn
= gen_subhi3_carry
;
20625 insn
= gen_subsi3_carry
;
20628 insn
= gen_subdi3_carry
;
20631 gcc_unreachable ();
20639 insn
= gen_addqi3_carry
;
20642 insn
= gen_addhi3_carry
;
20645 insn
= gen_addsi3_carry
;
20648 insn
= gen_adddi3_carry
;
20651 gcc_unreachable ();
20654 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20660 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20661 but works for floating pointer parameters and nonoffsetable memories.
20662 For pushes, it returns just stack offsets; the values will be saved
20663 in the right order. Maximally three parts are generated. */
20666 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20671 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20673 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20675 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20676 gcc_assert (size
>= 2 && size
<= 4);
20678 /* Optimize constant pool reference to immediates. This is used by fp
20679 moves, that force all constants to memory to allow combining. */
20680 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20682 rtx tmp
= maybe_get_pool_constant (operand
);
20687 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20689 /* The only non-offsetable memories we handle are pushes. */
20690 int ok
= push_operand (operand
, VOIDmode
);
20694 operand
= copy_rtx (operand
);
20695 PUT_MODE (operand
, word_mode
);
20696 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20700 if (GET_CODE (operand
) == CONST_VECTOR
)
20702 enum machine_mode imode
= int_mode_for_mode (mode
);
20703 /* Caution: if we looked through a constant pool memory above,
20704 the operand may actually have a different mode now. That's
20705 ok, since we want to pun this all the way back to an integer. */
20706 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20707 gcc_assert (operand
!= NULL
);
20713 if (mode
== DImode
)
20714 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20719 if (REG_P (operand
))
20721 gcc_assert (reload_completed
);
20722 for (i
= 0; i
< size
; i
++)
20723 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20725 else if (offsettable_memref_p (operand
))
20727 operand
= adjust_address (operand
, SImode
, 0);
20728 parts
[0] = operand
;
20729 for (i
= 1; i
< size
; i
++)
20730 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20732 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20737 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20741 real_to_target (l
, &r
, mode
);
20742 parts
[3] = gen_int_mode (l
[3], SImode
);
20743 parts
[2] = gen_int_mode (l
[2], SImode
);
20746 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20747 parts
[2] = gen_int_mode (l
[2], SImode
);
20750 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20753 gcc_unreachable ();
20755 parts
[1] = gen_int_mode (l
[1], SImode
);
20756 parts
[0] = gen_int_mode (l
[0], SImode
);
20759 gcc_unreachable ();
20764 if (mode
== TImode
)
20765 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20766 if (mode
== XFmode
|| mode
== TFmode
)
20768 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20769 if (REG_P (operand
))
20771 gcc_assert (reload_completed
);
20772 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20773 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20775 else if (offsettable_memref_p (operand
))
20777 operand
= adjust_address (operand
, DImode
, 0);
20778 parts
[0] = operand
;
20779 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20781 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20786 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20787 real_to_target (l
, &r
, mode
);
20789 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20790 if (HOST_BITS_PER_WIDE_INT
>= 64)
20793 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20794 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20797 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20799 if (upper_mode
== SImode
)
20800 parts
[1] = gen_int_mode (l
[2], SImode
);
20801 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20804 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20805 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20808 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20811 gcc_unreachable ();
20818 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20819 Return false when normal moves are needed; true when all required
20820 insns have been emitted. Operands 2-4 contain the input values
20821 int the correct order; operands 5-7 contain the output values. */
20824 ix86_split_long_move (rtx operands
[])
20829 int collisions
= 0;
20830 enum machine_mode mode
= GET_MODE (operands
[0]);
20831 bool collisionparts
[4];
20833 /* The DFmode expanders may ask us to move double.
20834 For 64bit target this is single move. By hiding the fact
20835 here we simplify i386.md splitters. */
20836 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20838 /* Optimize constant pool reference to immediates. This is used by
20839 fp moves, that force all constants to memory to allow combining. */
20841 if (MEM_P (operands
[1])
20842 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20843 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20844 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20845 if (push_operand (operands
[0], VOIDmode
))
20847 operands
[0] = copy_rtx (operands
[0]);
20848 PUT_MODE (operands
[0], word_mode
);
20851 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20852 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20853 emit_move_insn (operands
[0], operands
[1]);
20857 /* The only non-offsettable memory we handle is push. */
20858 if (push_operand (operands
[0], VOIDmode
))
20861 gcc_assert (!MEM_P (operands
[0])
20862 || offsettable_memref_p (operands
[0]));
20864 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20865 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20867 /* When emitting push, take care for source operands on the stack. */
20868 if (push
&& MEM_P (operands
[1])
20869 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20871 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20873 /* Compensate for the stack decrement by 4. */
20874 if (!TARGET_64BIT
&& nparts
== 3
20875 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20876 src_base
= plus_constant (Pmode
, src_base
, 4);
20878 /* src_base refers to the stack pointer and is
20879 automatically decreased by emitted push. */
20880 for (i
= 0; i
< nparts
; i
++)
20881 part
[1][i
] = change_address (part
[1][i
],
20882 GET_MODE (part
[1][i
]), src_base
);
20885 /* We need to do copy in the right order in case an address register
20886 of the source overlaps the destination. */
20887 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20891 for (i
= 0; i
< nparts
; i
++)
20894 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20895 if (collisionparts
[i
])
20899 /* Collision in the middle part can be handled by reordering. */
20900 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20902 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20903 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20905 else if (collisions
== 1
20907 && (collisionparts
[1] || collisionparts
[2]))
20909 if (collisionparts
[1])
20911 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20912 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20916 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20917 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20921 /* If there are more collisions, we can't handle it by reordering.
20922 Do an lea to the last part and use only one colliding move. */
20923 else if (collisions
> 1)
20929 base
= part
[0][nparts
- 1];
20931 /* Handle the case when the last part isn't valid for lea.
20932 Happens in 64-bit mode storing the 12-byte XFmode. */
20933 if (GET_MODE (base
) != Pmode
)
20934 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20936 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20937 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20938 for (i
= 1; i
< nparts
; i
++)
20940 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
20941 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20952 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20953 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
20954 stack_pointer_rtx
, GEN_INT (-4)));
20955 emit_move_insn (part
[0][2], part
[1][2]);
20957 else if (nparts
== 4)
20959 emit_move_insn (part
[0][3], part
[1][3]);
20960 emit_move_insn (part
[0][2], part
[1][2]);
20965 /* In 64bit mode we don't have 32bit push available. In case this is
20966 register, it is OK - we will just use larger counterpart. We also
20967 retype memory - these comes from attempt to avoid REX prefix on
20968 moving of second half of TFmode value. */
20969 if (GET_MODE (part
[1][1]) == SImode
)
20971 switch (GET_CODE (part
[1][1]))
20974 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20978 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20982 gcc_unreachable ();
20985 if (GET_MODE (part
[1][0]) == SImode
)
20986 part
[1][0] = part
[1][1];
20989 emit_move_insn (part
[0][1], part
[1][1]);
20990 emit_move_insn (part
[0][0], part
[1][0]);
20994 /* Choose correct order to not overwrite the source before it is copied. */
20995 if ((REG_P (part
[0][0])
20996 && REG_P (part
[1][1])
20997 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20999 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21001 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21003 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21005 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21007 operands
[2 + i
] = part
[0][j
];
21008 operands
[6 + i
] = part
[1][j
];
21013 for (i
= 0; i
< nparts
; i
++)
21015 operands
[2 + i
] = part
[0][i
];
21016 operands
[6 + i
] = part
[1][i
];
21020 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21021 if (optimize_insn_for_size_p ())
21023 for (j
= 0; j
< nparts
- 1; j
++)
21024 if (CONST_INT_P (operands
[6 + j
])
21025 && operands
[6 + j
] != const0_rtx
21026 && REG_P (operands
[2 + j
]))
21027 for (i
= j
; i
< nparts
- 1; i
++)
21028 if (CONST_INT_P (operands
[7 + i
])
21029 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21030 operands
[7 + i
] = operands
[2 + j
];
21033 for (i
= 0; i
< nparts
; i
++)
21034 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21039 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21040 left shift by a constant, either using a single shift or
21041 a sequence of add instructions. */
21044 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21046 rtx (*insn
)(rtx
, rtx
, rtx
);
21049 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21050 && !optimize_insn_for_size_p ()))
21052 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21053 while (count
-- > 0)
21054 emit_insn (insn (operand
, operand
, operand
));
21058 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21059 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21064 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21066 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21067 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21068 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21070 rtx low
[2], high
[2];
21073 if (CONST_INT_P (operands
[2]))
21075 split_double_mode (mode
, operands
, 2, low
, high
);
21076 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21078 if (count
>= half_width
)
21080 emit_move_insn (high
[0], low
[1]);
21081 emit_move_insn (low
[0], const0_rtx
);
21083 if (count
> half_width
)
21084 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21088 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21090 if (!rtx_equal_p (operands
[0], operands
[1]))
21091 emit_move_insn (operands
[0], operands
[1]);
21093 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21094 ix86_expand_ashl_const (low
[0], count
, mode
);
21099 split_double_mode (mode
, operands
, 1, low
, high
);
21101 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21103 if (operands
[1] == const1_rtx
)
21105 /* Assuming we've chosen a QImode capable registers, then 1 << N
21106 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21107 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21109 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21111 ix86_expand_clear (low
[0]);
21112 ix86_expand_clear (high
[0]);
21113 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21115 d
= gen_lowpart (QImode
, low
[0]);
21116 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21117 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21118 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21120 d
= gen_lowpart (QImode
, high
[0]);
21121 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21122 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21123 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21126 /* Otherwise, we can get the same results by manually performing
21127 a bit extract operation on bit 5/6, and then performing the two
21128 shifts. The two methods of getting 0/1 into low/high are exactly
21129 the same size. Avoiding the shift in the bit extract case helps
21130 pentium4 a bit; no one else seems to care much either way. */
21133 enum machine_mode half_mode
;
21134 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21135 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21136 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21137 HOST_WIDE_INT bits
;
21140 if (mode
== DImode
)
21142 half_mode
= SImode
;
21143 gen_lshr3
= gen_lshrsi3
;
21144 gen_and3
= gen_andsi3
;
21145 gen_xor3
= gen_xorsi3
;
21150 half_mode
= DImode
;
21151 gen_lshr3
= gen_lshrdi3
;
21152 gen_and3
= gen_anddi3
;
21153 gen_xor3
= gen_xordi3
;
21157 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21158 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21160 x
= gen_lowpart (half_mode
, operands
[2]);
21161 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21163 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21164 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21165 emit_move_insn (low
[0], high
[0]);
21166 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21169 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21170 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21174 if (operands
[1] == constm1_rtx
)
21176 /* For -1 << N, we can avoid the shld instruction, because we
21177 know that we're shifting 0...31/63 ones into a -1. */
21178 emit_move_insn (low
[0], constm1_rtx
);
21179 if (optimize_insn_for_size_p ())
21180 emit_move_insn (high
[0], low
[0]);
21182 emit_move_insn (high
[0], constm1_rtx
);
21186 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21188 if (!rtx_equal_p (operands
[0], operands
[1]))
21189 emit_move_insn (operands
[0], operands
[1]);
21191 split_double_mode (mode
, operands
, 1, low
, high
);
21192 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21195 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21197 if (TARGET_CMOVE
&& scratch
)
21199 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21200 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21202 ix86_expand_clear (scratch
);
21203 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21207 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21208 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21210 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21215 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21217 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21218 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21219 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21220 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21222 rtx low
[2], high
[2];
21225 if (CONST_INT_P (operands
[2]))
21227 split_double_mode (mode
, operands
, 2, low
, high
);
21228 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21230 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21232 emit_move_insn (high
[0], high
[1]);
21233 emit_insn (gen_ashr3 (high
[0], high
[0],
21234 GEN_INT (half_width
- 1)));
21235 emit_move_insn (low
[0], high
[0]);
21238 else if (count
>= half_width
)
21240 emit_move_insn (low
[0], high
[1]);
21241 emit_move_insn (high
[0], low
[0]);
21242 emit_insn (gen_ashr3 (high
[0], high
[0],
21243 GEN_INT (half_width
- 1)));
21245 if (count
> half_width
)
21246 emit_insn (gen_ashr3 (low
[0], low
[0],
21247 GEN_INT (count
- half_width
)));
21251 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21253 if (!rtx_equal_p (operands
[0], operands
[1]))
21254 emit_move_insn (operands
[0], operands
[1]);
21256 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21257 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21262 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21264 if (!rtx_equal_p (operands
[0], operands
[1]))
21265 emit_move_insn (operands
[0], operands
[1]);
21267 split_double_mode (mode
, operands
, 1, low
, high
);
21269 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21270 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21272 if (TARGET_CMOVE
&& scratch
)
21274 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21275 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21277 emit_move_insn (scratch
, high
[0]);
21278 emit_insn (gen_ashr3 (scratch
, scratch
,
21279 GEN_INT (half_width
- 1)));
21280 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21285 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21286 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21288 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21294 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21296 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21297 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21298 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21299 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21301 rtx low
[2], high
[2];
21304 if (CONST_INT_P (operands
[2]))
21306 split_double_mode (mode
, operands
, 2, low
, high
);
21307 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21309 if (count
>= half_width
)
21311 emit_move_insn (low
[0], high
[1]);
21312 ix86_expand_clear (high
[0]);
21314 if (count
> half_width
)
21315 emit_insn (gen_lshr3 (low
[0], low
[0],
21316 GEN_INT (count
- half_width
)));
21320 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21322 if (!rtx_equal_p (operands
[0], operands
[1]))
21323 emit_move_insn (operands
[0], operands
[1]);
21325 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21326 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21331 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21333 if (!rtx_equal_p (operands
[0], operands
[1]))
21334 emit_move_insn (operands
[0], operands
[1]);
21336 split_double_mode (mode
, operands
, 1, low
, high
);
21338 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21339 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21341 if (TARGET_CMOVE
&& scratch
)
21343 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21344 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21346 ix86_expand_clear (scratch
);
21347 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21352 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21353 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21355 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21360 /* Predict just emitted jump instruction to be taken with probability PROB. */
21362 predict_jump (int prob
)
21364 rtx insn
= get_last_insn ();
21365 gcc_assert (JUMP_P (insn
));
21366 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21369 /* Helper function for the string operations below. Dest VARIABLE whether
21370 it is aligned to VALUE bytes. If true, jump to the label. */
21372 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21374 rtx label
= gen_label_rtx ();
21375 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21376 if (GET_MODE (variable
) == DImode
)
21377 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21379 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21380 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21383 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21385 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21389 /* Adjust COUNTER by the VALUE. */
21391 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21393 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21394 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21396 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21399 /* Zero extend possibly SImode EXP to Pmode register. */
21401 ix86_zero_extend_to_Pmode (rtx exp
)
21403 if (GET_MODE (exp
) != Pmode
)
21404 exp
= convert_to_mode (Pmode
, exp
, 1);
21405 return force_reg (Pmode
, exp
);
21408 /* Divide COUNTREG by SCALE. */
21410 scale_counter (rtx countreg
, int scale
)
21416 if (CONST_INT_P (countreg
))
21417 return GEN_INT (INTVAL (countreg
) / scale
);
21418 gcc_assert (REG_P (countreg
));
21420 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21421 GEN_INT (exact_log2 (scale
)),
21422 NULL
, 1, OPTAB_DIRECT
);
21426 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21427 DImode for constant loop counts. */
21429 static enum machine_mode
21430 counter_mode (rtx count_exp
)
21432 if (GET_MODE (count_exp
) != VOIDmode
)
21433 return GET_MODE (count_exp
);
21434 if (!CONST_INT_P (count_exp
))
21436 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21441 /* When SRCPTR is non-NULL, output simple loop to move memory
21442 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21443 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21444 equivalent loop to set memory by VALUE (supposed to be in MODE).
21446 The size is rounded down to whole number of chunk size moved at once.
21447 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21451 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21452 rtx destptr
, rtx srcptr
, rtx value
,
21453 rtx count
, enum machine_mode mode
, int unroll
,
21456 rtx out_label
, top_label
, iter
, tmp
;
21457 enum machine_mode iter_mode
= counter_mode (count
);
21458 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21459 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21465 top_label
= gen_label_rtx ();
21466 out_label
= gen_label_rtx ();
21467 iter
= gen_reg_rtx (iter_mode
);
21469 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21470 NULL
, 1, OPTAB_DIRECT
);
21471 /* Those two should combine. */
21472 if (piece_size
== const1_rtx
)
21474 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21476 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21478 emit_move_insn (iter
, const0_rtx
);
21480 emit_label (top_label
);
21482 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21483 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21484 destmem
= change_address (destmem
, mode
, x_addr
);
21488 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21489 srcmem
= change_address (srcmem
, mode
, y_addr
);
21491 /* When unrolling for chips that reorder memory reads and writes,
21492 we can save registers by using single temporary.
21493 Also using 4 temporaries is overkill in 32bit mode. */
21494 if (!TARGET_64BIT
&& 0)
21496 for (i
= 0; i
< unroll
; i
++)
21501 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21503 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21505 emit_move_insn (destmem
, srcmem
);
21511 gcc_assert (unroll
<= 4);
21512 for (i
= 0; i
< unroll
; i
++)
21514 tmpreg
[i
] = gen_reg_rtx (mode
);
21518 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21520 emit_move_insn (tmpreg
[i
], srcmem
);
21522 for (i
= 0; i
< unroll
; i
++)
21527 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21529 emit_move_insn (destmem
, tmpreg
[i
]);
21534 for (i
= 0; i
< unroll
; i
++)
21538 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21539 emit_move_insn (destmem
, value
);
21542 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21543 true, OPTAB_LIB_WIDEN
);
21545 emit_move_insn (iter
, tmp
);
21547 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21549 if (expected_size
!= -1)
21551 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21552 if (expected_size
== 0)
21554 else if (expected_size
> REG_BR_PROB_BASE
)
21555 predict_jump (REG_BR_PROB_BASE
- 1);
21557 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21560 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21561 iter
= ix86_zero_extend_to_Pmode (iter
);
21562 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21563 true, OPTAB_LIB_WIDEN
);
21564 if (tmp
!= destptr
)
21565 emit_move_insn (destptr
, tmp
);
21568 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21569 true, OPTAB_LIB_WIDEN
);
21571 emit_move_insn (srcptr
, tmp
);
21573 emit_label (out_label
);
21576 /* Output "rep; mov" instruction.
21577 Arguments have same meaning as for previous function */
21579 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21580 rtx destptr
, rtx srcptr
,
21582 enum machine_mode mode
)
21587 HOST_WIDE_INT rounded_count
;
21589 /* If the size is known, it is shorter to use rep movs. */
21590 if (mode
== QImode
&& CONST_INT_P (count
)
21591 && !(INTVAL (count
) & 3))
21594 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21595 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21596 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21597 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21598 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21599 if (mode
!= QImode
)
21601 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21602 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21603 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21604 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21605 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21606 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21610 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21611 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21613 if (CONST_INT_P (count
))
21615 rounded_count
= (INTVAL (count
)
21616 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21617 destmem
= shallow_copy_rtx (destmem
);
21618 srcmem
= shallow_copy_rtx (srcmem
);
21619 set_mem_size (destmem
, rounded_count
);
21620 set_mem_size (srcmem
, rounded_count
);
21624 if (MEM_SIZE_KNOWN_P (destmem
))
21625 clear_mem_size (destmem
);
21626 if (MEM_SIZE_KNOWN_P (srcmem
))
21627 clear_mem_size (srcmem
);
21629 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21633 /* Output "rep; stos" instruction.
21634 Arguments have same meaning as for previous function */
21636 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21637 rtx count
, enum machine_mode mode
,
21642 HOST_WIDE_INT rounded_count
;
21644 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21645 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21646 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21647 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21648 if (mode
!= QImode
)
21650 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21651 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21652 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21655 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21656 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21658 rounded_count
= (INTVAL (count
)
21659 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21660 destmem
= shallow_copy_rtx (destmem
);
21661 set_mem_size (destmem
, rounded_count
);
21663 else if (MEM_SIZE_KNOWN_P (destmem
))
21664 clear_mem_size (destmem
);
21665 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21669 emit_strmov (rtx destmem
, rtx srcmem
,
21670 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21672 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21673 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21674 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21677 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21679 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21680 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21683 if (CONST_INT_P (count
))
21685 HOST_WIDE_INT countval
= INTVAL (count
);
21688 if ((countval
& 0x10) && max_size
> 16)
21692 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21693 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21696 gcc_unreachable ();
21699 if ((countval
& 0x08) && max_size
> 8)
21702 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21705 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21706 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21710 if ((countval
& 0x04) && max_size
> 4)
21712 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21715 if ((countval
& 0x02) && max_size
> 2)
21717 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21720 if ((countval
& 0x01) && max_size
> 1)
21722 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21729 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21730 count
, 1, OPTAB_DIRECT
);
21731 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21732 count
, QImode
, 1, 4);
21736 /* When there are stringops, we can cheaply increase dest and src pointers.
21737 Otherwise we save code size by maintaining offset (zero is readily
21738 available from preceding rep operation) and using x86 addressing modes.
21740 if (TARGET_SINGLE_STRINGOP
)
21744 rtx label
= ix86_expand_aligntest (count
, 4, true);
21745 src
= change_address (srcmem
, SImode
, srcptr
);
21746 dest
= change_address (destmem
, SImode
, destptr
);
21747 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21748 emit_label (label
);
21749 LABEL_NUSES (label
) = 1;
21753 rtx label
= ix86_expand_aligntest (count
, 2, true);
21754 src
= change_address (srcmem
, HImode
, srcptr
);
21755 dest
= change_address (destmem
, HImode
, destptr
);
21756 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21757 emit_label (label
);
21758 LABEL_NUSES (label
) = 1;
21762 rtx label
= ix86_expand_aligntest (count
, 1, true);
21763 src
= change_address (srcmem
, QImode
, srcptr
);
21764 dest
= change_address (destmem
, QImode
, destptr
);
21765 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21766 emit_label (label
);
21767 LABEL_NUSES (label
) = 1;
21772 rtx offset
= force_reg (Pmode
, const0_rtx
);
21777 rtx label
= ix86_expand_aligntest (count
, 4, true);
21778 src
= change_address (srcmem
, SImode
, srcptr
);
21779 dest
= change_address (destmem
, SImode
, destptr
);
21780 emit_move_insn (dest
, src
);
21781 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21782 true, OPTAB_LIB_WIDEN
);
21784 emit_move_insn (offset
, tmp
);
21785 emit_label (label
);
21786 LABEL_NUSES (label
) = 1;
21790 rtx label
= ix86_expand_aligntest (count
, 2, true);
21791 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21792 src
= change_address (srcmem
, HImode
, tmp
);
21793 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21794 dest
= change_address (destmem
, HImode
, tmp
);
21795 emit_move_insn (dest
, src
);
21796 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21797 true, OPTAB_LIB_WIDEN
);
21799 emit_move_insn (offset
, tmp
);
21800 emit_label (label
);
21801 LABEL_NUSES (label
) = 1;
21805 rtx label
= ix86_expand_aligntest (count
, 1, true);
21806 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21807 src
= change_address (srcmem
, QImode
, tmp
);
21808 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21809 dest
= change_address (destmem
, QImode
, tmp
);
21810 emit_move_insn (dest
, src
);
21811 emit_label (label
);
21812 LABEL_NUSES (label
) = 1;
21817 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21819 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21820 rtx count
, int max_size
)
21823 expand_simple_binop (counter_mode (count
), AND
, count
,
21824 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21825 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21826 gen_lowpart (QImode
, value
), count
, QImode
,
21830 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21832 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21836 if (CONST_INT_P (count
))
21838 HOST_WIDE_INT countval
= INTVAL (count
);
21841 if ((countval
& 0x10) && max_size
> 16)
21845 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21846 emit_insn (gen_strset (destptr
, dest
, value
));
21847 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21848 emit_insn (gen_strset (destptr
, dest
, value
));
21851 gcc_unreachable ();
21854 if ((countval
& 0x08) && max_size
> 8)
21858 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21859 emit_insn (gen_strset (destptr
, dest
, value
));
21863 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21864 emit_insn (gen_strset (destptr
, dest
, value
));
21865 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21866 emit_insn (gen_strset (destptr
, dest
, value
));
21870 if ((countval
& 0x04) && max_size
> 4)
21872 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21873 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21876 if ((countval
& 0x02) && max_size
> 2)
21878 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21879 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21882 if ((countval
& 0x01) && max_size
> 1)
21884 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21885 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21892 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21897 rtx label
= ix86_expand_aligntest (count
, 16, true);
21900 dest
= change_address (destmem
, DImode
, destptr
);
21901 emit_insn (gen_strset (destptr
, dest
, value
));
21902 emit_insn (gen_strset (destptr
, dest
, value
));
21906 dest
= change_address (destmem
, SImode
, destptr
);
21907 emit_insn (gen_strset (destptr
, dest
, value
));
21908 emit_insn (gen_strset (destptr
, dest
, value
));
21909 emit_insn (gen_strset (destptr
, dest
, value
));
21910 emit_insn (gen_strset (destptr
, dest
, value
));
21912 emit_label (label
);
21913 LABEL_NUSES (label
) = 1;
21917 rtx label
= ix86_expand_aligntest (count
, 8, true);
21920 dest
= change_address (destmem
, DImode
, destptr
);
21921 emit_insn (gen_strset (destptr
, dest
, value
));
21925 dest
= change_address (destmem
, SImode
, destptr
);
21926 emit_insn (gen_strset (destptr
, dest
, value
));
21927 emit_insn (gen_strset (destptr
, dest
, value
));
21929 emit_label (label
);
21930 LABEL_NUSES (label
) = 1;
21934 rtx label
= ix86_expand_aligntest (count
, 4, true);
21935 dest
= change_address (destmem
, SImode
, destptr
);
21936 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21937 emit_label (label
);
21938 LABEL_NUSES (label
) = 1;
21942 rtx label
= ix86_expand_aligntest (count
, 2, true);
21943 dest
= change_address (destmem
, HImode
, destptr
);
21944 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21945 emit_label (label
);
21946 LABEL_NUSES (label
) = 1;
21950 rtx label
= ix86_expand_aligntest (count
, 1, true);
21951 dest
= change_address (destmem
, QImode
, destptr
);
21952 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21953 emit_label (label
);
21954 LABEL_NUSES (label
) = 1;
21958 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21959 DESIRED_ALIGNMENT. */
21961 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21962 rtx destptr
, rtx srcptr
, rtx count
,
21963 int align
, int desired_alignment
)
21965 if (align
<= 1 && desired_alignment
> 1)
21967 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21968 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21969 destmem
= change_address (destmem
, QImode
, destptr
);
21970 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21971 ix86_adjust_counter (count
, 1);
21972 emit_label (label
);
21973 LABEL_NUSES (label
) = 1;
21975 if (align
<= 2 && desired_alignment
> 2)
21977 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21978 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21979 destmem
= change_address (destmem
, HImode
, destptr
);
21980 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21981 ix86_adjust_counter (count
, 2);
21982 emit_label (label
);
21983 LABEL_NUSES (label
) = 1;
21985 if (align
<= 4 && desired_alignment
> 4)
21987 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21988 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21989 destmem
= change_address (destmem
, SImode
, destptr
);
21990 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21991 ix86_adjust_counter (count
, 4);
21992 emit_label (label
);
21993 LABEL_NUSES (label
) = 1;
21995 gcc_assert (desired_alignment
<= 8);
21998 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21999 ALIGN_BYTES is how many bytes need to be copied. */
22001 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22002 int desired_align
, int align_bytes
)
22005 rtx orig_dst
= dst
;
22006 rtx orig_src
= src
;
22008 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22009 if (src_align_bytes
>= 0)
22010 src_align_bytes
= desired_align
- src_align_bytes
;
22011 if (align_bytes
& 1)
22013 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22014 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
22016 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22018 if (align_bytes
& 2)
22020 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22021 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
22022 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22023 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22024 if (src_align_bytes
>= 0
22025 && (src_align_bytes
& 1) == (align_bytes
& 1)
22026 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
22027 set_mem_align (src
, 2 * BITS_PER_UNIT
);
22029 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22031 if (align_bytes
& 4)
22033 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22034 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
22035 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22036 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22037 if (src_align_bytes
>= 0)
22039 unsigned int src_align
= 0;
22040 if ((src_align_bytes
& 3) == (align_bytes
& 3))
22042 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22044 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22045 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22048 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22050 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22051 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
22052 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22053 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22054 if (src_align_bytes
>= 0)
22056 unsigned int src_align
= 0;
22057 if ((src_align_bytes
& 7) == (align_bytes
& 7))
22059 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
22061 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22063 if (src_align
> (unsigned int) desired_align
)
22064 src_align
= desired_align
;
22065 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22066 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22068 if (MEM_SIZE_KNOWN_P (orig_dst
))
22069 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22070 if (MEM_SIZE_KNOWN_P (orig_src
))
22071 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22076 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22077 DESIRED_ALIGNMENT. */
22079 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22080 int align
, int desired_alignment
)
22082 if (align
<= 1 && desired_alignment
> 1)
22084 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22085 destmem
= change_address (destmem
, QImode
, destptr
);
22086 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22087 ix86_adjust_counter (count
, 1);
22088 emit_label (label
);
22089 LABEL_NUSES (label
) = 1;
22091 if (align
<= 2 && desired_alignment
> 2)
22093 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22094 destmem
= change_address (destmem
, HImode
, destptr
);
22095 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22096 ix86_adjust_counter (count
, 2);
22097 emit_label (label
);
22098 LABEL_NUSES (label
) = 1;
22100 if (align
<= 4 && desired_alignment
> 4)
22102 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22103 destmem
= change_address (destmem
, SImode
, destptr
);
22104 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22105 ix86_adjust_counter (count
, 4);
22106 emit_label (label
);
22107 LABEL_NUSES (label
) = 1;
22109 gcc_assert (desired_alignment
<= 8);
22112 /* Set enough from DST to align DST known to by aligned by ALIGN to
22113 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22115 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22116 int desired_align
, int align_bytes
)
22119 rtx orig_dst
= dst
;
22120 if (align_bytes
& 1)
22122 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22124 emit_insn (gen_strset (destreg
, dst
,
22125 gen_lowpart (QImode
, value
)));
22127 if (align_bytes
& 2)
22129 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22130 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22131 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22133 emit_insn (gen_strset (destreg
, dst
,
22134 gen_lowpart (HImode
, value
)));
22136 if (align_bytes
& 4)
22138 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22139 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22140 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22142 emit_insn (gen_strset (destreg
, dst
,
22143 gen_lowpart (SImode
, value
)));
22145 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22146 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22147 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22148 if (MEM_SIZE_KNOWN_P (orig_dst
))
22149 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22153 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22154 static enum stringop_alg
22155 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22156 int *dynamic_check
)
22158 const struct stringop_algs
* algs
;
22159 bool optimize_for_speed
;
22160 /* Algorithms using the rep prefix want at least edi and ecx;
22161 additionally, memset wants eax and memcpy wants esi. Don't
22162 consider such algorithms if the user has appropriated those
22163 registers for their own purposes. */
22164 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22166 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22168 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22169 || (alg != rep_prefix_1_byte \
22170 && alg != rep_prefix_4_byte \
22171 && alg != rep_prefix_8_byte))
22172 const struct processor_costs
*cost
;
22174 /* Even if the string operation call is cold, we still might spend a lot
22175 of time processing large blocks. */
22176 if (optimize_function_for_size_p (cfun
)
22177 || (optimize_insn_for_size_p ()
22178 && expected_size
!= -1 && expected_size
< 256))
22179 optimize_for_speed
= false;
22181 optimize_for_speed
= true;
22183 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22185 *dynamic_check
= -1;
22187 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22189 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22190 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22191 return ix86_stringop_alg
;
22192 /* rep; movq or rep; movl is the smallest variant. */
22193 else if (!optimize_for_speed
)
22195 if (!count
|| (count
& 3))
22196 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22198 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22200 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22202 else if (expected_size
!= -1 && expected_size
< 4)
22203 return loop_1_byte
;
22204 else if (expected_size
!= -1)
22207 enum stringop_alg alg
= libcall
;
22208 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22210 /* We get here if the algorithms that were not libcall-based
22211 were rep-prefix based and we are unable to use rep prefixes
22212 based on global register usage. Break out of the loop and
22213 use the heuristic below. */
22214 if (algs
->size
[i
].max
== 0)
22216 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22218 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22220 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22222 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22223 last non-libcall inline algorithm. */
22224 if (TARGET_INLINE_ALL_STRINGOPS
)
22226 /* When the current size is best to be copied by a libcall,
22227 but we are still forced to inline, run the heuristic below
22228 that will pick code for medium sized blocks. */
22229 if (alg
!= libcall
)
22233 else if (ALG_USABLE_P (candidate
))
22237 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22239 /* When asked to inline the call anyway, try to pick meaningful choice.
22240 We look for maximal size of block that is faster to copy by hand and
22241 take blocks of at most of that size guessing that average size will
22242 be roughly half of the block.
22244 If this turns out to be bad, we might simply specify the preferred
22245 choice in ix86_costs. */
22246 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22247 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22250 enum stringop_alg alg
;
22252 bool any_alg_usable_p
= true;
22254 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22256 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22257 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22259 if (candidate
!= libcall
&& candidate
22260 && ALG_USABLE_P (candidate
))
22261 max
= algs
->size
[i
].max
;
22263 /* If there aren't any usable algorithms, then recursing on
22264 smaller sizes isn't going to find anything. Just return the
22265 simple byte-at-a-time copy loop. */
22266 if (!any_alg_usable_p
)
22268 /* Pick something reasonable. */
22269 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22270 *dynamic_check
= 128;
22271 return loop_1_byte
;
22275 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
22276 gcc_assert (*dynamic_check
== -1);
22277 gcc_assert (alg
!= libcall
);
22278 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22279 *dynamic_check
= max
;
22282 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22283 #undef ALG_USABLE_P
22286 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22287 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22289 decide_alignment (int align
,
22290 enum stringop_alg alg
,
22293 int desired_align
= 0;
22297 gcc_unreachable ();
22299 case unrolled_loop
:
22300 desired_align
= GET_MODE_SIZE (Pmode
);
22302 case rep_prefix_8_byte
:
22305 case rep_prefix_4_byte
:
22306 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22307 copying whole cacheline at once. */
22308 if (TARGET_PENTIUMPRO
)
22313 case rep_prefix_1_byte
:
22314 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22315 copying whole cacheline at once. */
22316 if (TARGET_PENTIUMPRO
)
22330 if (desired_align
< align
)
22331 desired_align
= align
;
22332 if (expected_size
!= -1 && expected_size
< 4)
22333 desired_align
= align
;
22334 return desired_align
;
22337 /* Return the smallest power of 2 greater than VAL. */
22339 smallest_pow2_greater_than (int val
)
22347 /* Expand string move (memcpy) operation. Use i386 string operations
22348 when profitable. expand_setmem contains similar code. The code
22349 depends upon architecture, block size and alignment, but always has
22350 the same overall structure:
22352 1) Prologue guard: Conditional that jumps up to epilogues for small
22353 blocks that can be handled by epilogue alone. This is faster
22354 but also needed for correctness, since prologue assume the block
22355 is larger than the desired alignment.
22357 Optional dynamic check for size and libcall for large
22358 blocks is emitted here too, with -minline-stringops-dynamically.
22360 2) Prologue: copy first few bytes in order to get destination
22361 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22362 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22363 copied. We emit either a jump tree on power of two sized
22364 blocks, or a byte loop.
22366 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22367 with specified algorithm.
22369 4) Epilogue: code copying tail of the block that is too small to be
22370 handled by main body (or up to size guarded by prologue guard). */
22373 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22374 rtx expected_align_exp
, rtx expected_size_exp
)
22380 rtx jump_around_label
= NULL
;
22381 HOST_WIDE_INT align
= 1;
22382 unsigned HOST_WIDE_INT count
= 0;
22383 HOST_WIDE_INT expected_size
= -1;
22384 int size_needed
= 0, epilogue_size_needed
;
22385 int desired_align
= 0, align_bytes
= 0;
22386 enum stringop_alg alg
;
22388 bool need_zero_guard
= false;
22390 if (CONST_INT_P (align_exp
))
22391 align
= INTVAL (align_exp
);
22392 /* i386 can do misaligned access on reasonably increased cost. */
22393 if (CONST_INT_P (expected_align_exp
)
22394 && INTVAL (expected_align_exp
) > align
)
22395 align
= INTVAL (expected_align_exp
);
22396 /* ALIGN is the minimum of destination and source alignment, but we care here
22397 just about destination alignment. */
22398 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22399 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22401 if (CONST_INT_P (count_exp
))
22402 count
= expected_size
= INTVAL (count_exp
);
22403 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22404 expected_size
= INTVAL (expected_size_exp
);
22406 /* Make sure we don't need to care about overflow later on. */
22407 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22410 /* Step 0: Decide on preferred algorithm, desired alignment and
22411 size of chunks to be copied by main loop. */
22413 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22414 desired_align
= decide_alignment (align
, alg
, expected_size
);
22416 if (!TARGET_ALIGN_STRINGOPS
)
22417 align
= desired_align
;
22419 if (alg
== libcall
)
22421 gcc_assert (alg
!= no_stringop
);
22423 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22424 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22425 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22430 gcc_unreachable ();
22432 need_zero_guard
= true;
22433 size_needed
= GET_MODE_SIZE (word_mode
);
22435 case unrolled_loop
:
22436 need_zero_guard
= true;
22437 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22439 case rep_prefix_8_byte
:
22442 case rep_prefix_4_byte
:
22445 case rep_prefix_1_byte
:
22449 need_zero_guard
= true;
22454 epilogue_size_needed
= size_needed
;
22456 /* Step 1: Prologue guard. */
22458 /* Alignment code needs count to be in register. */
22459 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22461 if (INTVAL (count_exp
) > desired_align
22462 && INTVAL (count_exp
) > size_needed
)
22465 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22466 if (align_bytes
<= 0)
22469 align_bytes
= desired_align
- align_bytes
;
22471 if (align_bytes
== 0)
22472 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22474 gcc_assert (desired_align
>= 1 && align
>= 1);
22476 /* Ensure that alignment prologue won't copy past end of block. */
22477 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22479 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22480 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22481 Make sure it is power of 2. */
22482 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22486 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22488 /* If main algorithm works on QImode, no epilogue is needed.
22489 For small sizes just don't align anything. */
22490 if (size_needed
== 1)
22491 desired_align
= align
;
22498 label
= gen_label_rtx ();
22499 emit_cmp_and_jump_insns (count_exp
,
22500 GEN_INT (epilogue_size_needed
),
22501 LTU
, 0, counter_mode (count_exp
), 1, label
);
22502 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22503 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22505 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22509 /* Emit code to decide on runtime whether library call or inline should be
22511 if (dynamic_check
!= -1)
22513 if (CONST_INT_P (count_exp
))
22515 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22517 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22518 count_exp
= const0_rtx
;
22524 rtx hot_label
= gen_label_rtx ();
22525 jump_around_label
= gen_label_rtx ();
22526 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22527 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22528 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22529 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22530 emit_jump (jump_around_label
);
22531 emit_label (hot_label
);
22535 /* Step 2: Alignment prologue. */
22537 if (desired_align
> align
)
22539 if (align_bytes
== 0)
22541 /* Except for the first move in epilogue, we no longer know
22542 constant offset in aliasing info. It don't seems to worth
22543 the pain to maintain it for the first move, so throw away
22545 src
= change_address (src
, BLKmode
, srcreg
);
22546 dst
= change_address (dst
, BLKmode
, destreg
);
22547 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22552 /* If we know how many bytes need to be stored before dst is
22553 sufficiently aligned, maintain aliasing info accurately. */
22554 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22555 desired_align
, align_bytes
);
22556 count_exp
= plus_constant (counter_mode (count_exp
),
22557 count_exp
, -align_bytes
);
22558 count
-= align_bytes
;
22560 if (need_zero_guard
22561 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22562 || (align_bytes
== 0
22563 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22564 + desired_align
- align
))))
22566 /* It is possible that we copied enough so the main loop will not
22568 gcc_assert (size_needed
> 1);
22569 if (label
== NULL_RTX
)
22570 label
= gen_label_rtx ();
22571 emit_cmp_and_jump_insns (count_exp
,
22572 GEN_INT (size_needed
),
22573 LTU
, 0, counter_mode (count_exp
), 1, label
);
22574 if (expected_size
== -1
22575 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22576 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22578 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22581 if (label
&& size_needed
== 1)
22583 emit_label (label
);
22584 LABEL_NUSES (label
) = 1;
22586 epilogue_size_needed
= 1;
22588 else if (label
== NULL_RTX
)
22589 epilogue_size_needed
= size_needed
;
22591 /* Step 3: Main loop. */
22597 gcc_unreachable ();
22599 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22600 count_exp
, QImode
, 1, expected_size
);
22603 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22604 count_exp
, word_mode
, 1, expected_size
);
22606 case unrolled_loop
:
22607 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22608 registers for 4 temporaries anyway. */
22609 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22610 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22613 case rep_prefix_8_byte
:
22614 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22617 case rep_prefix_4_byte
:
22618 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22621 case rep_prefix_1_byte
:
22622 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22626 /* Adjust properly the offset of src and dest memory for aliasing. */
22627 if (CONST_INT_P (count_exp
))
22629 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22630 (count
/ size_needed
) * size_needed
);
22631 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22632 (count
/ size_needed
) * size_needed
);
22636 src
= change_address (src
, BLKmode
, srcreg
);
22637 dst
= change_address (dst
, BLKmode
, destreg
);
22640 /* Step 4: Epilogue to copy the remaining bytes. */
22644 /* When the main loop is done, COUNT_EXP might hold original count,
22645 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22646 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22647 bytes. Compensate if needed. */
22649 if (size_needed
< epilogue_size_needed
)
22652 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22653 GEN_INT (size_needed
- 1), count_exp
, 1,
22655 if (tmp
!= count_exp
)
22656 emit_move_insn (count_exp
, tmp
);
22658 emit_label (label
);
22659 LABEL_NUSES (label
) = 1;
22662 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22663 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22664 epilogue_size_needed
);
22665 if (jump_around_label
)
22666 emit_label (jump_around_label
);
22670 /* Helper function for memcpy. For QImode value 0xXY produce
22671 0xXYXYXYXY of wide specified by MODE. This is essentially
22672 a * 0x10101010, but we can do slightly better than
22673 synth_mult by unwinding the sequence by hand on CPUs with
22676 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22678 enum machine_mode valmode
= GET_MODE (val
);
22680 int nops
= mode
== DImode
? 3 : 2;
22682 gcc_assert (mode
== SImode
|| mode
== DImode
);
22683 if (val
== const0_rtx
)
22684 return copy_to_mode_reg (mode
, const0_rtx
);
22685 if (CONST_INT_P (val
))
22687 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22691 if (mode
== DImode
)
22692 v
|= (v
<< 16) << 16;
22693 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22696 if (valmode
== VOIDmode
)
22698 if (valmode
!= QImode
)
22699 val
= gen_lowpart (QImode
, val
);
22700 if (mode
== QImode
)
22702 if (!TARGET_PARTIAL_REG_STALL
)
22704 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22705 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22706 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22707 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22709 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22710 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22711 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22716 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22718 if (!TARGET_PARTIAL_REG_STALL
)
22719 if (mode
== SImode
)
22720 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22722 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22725 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22726 NULL
, 1, OPTAB_DIRECT
);
22728 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22730 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22731 NULL
, 1, OPTAB_DIRECT
);
22732 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22733 if (mode
== SImode
)
22735 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22736 NULL
, 1, OPTAB_DIRECT
);
22737 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22742 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22743 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22744 alignment from ALIGN to DESIRED_ALIGN. */
22746 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22751 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22752 promoted_val
= promote_duplicated_reg (DImode
, val
);
22753 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22754 promoted_val
= promote_duplicated_reg (SImode
, val
);
22755 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22756 promoted_val
= promote_duplicated_reg (HImode
, val
);
22758 promoted_val
= val
;
22760 return promoted_val
;
22763 /* Expand string clear operation (bzero). Use i386 string operations when
22764 profitable. See expand_movmem comment for explanation of individual
22765 steps performed. */
22767 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22768 rtx expected_align_exp
, rtx expected_size_exp
)
22773 rtx jump_around_label
= NULL
;
22774 HOST_WIDE_INT align
= 1;
22775 unsigned HOST_WIDE_INT count
= 0;
22776 HOST_WIDE_INT expected_size
= -1;
22777 int size_needed
= 0, epilogue_size_needed
;
22778 int desired_align
= 0, align_bytes
= 0;
22779 enum stringop_alg alg
;
22780 rtx promoted_val
= NULL
;
22781 bool force_loopy_epilogue
= false;
22783 bool need_zero_guard
= false;
22785 if (CONST_INT_P (align_exp
))
22786 align
= INTVAL (align_exp
);
22787 /* i386 can do misaligned access on reasonably increased cost. */
22788 if (CONST_INT_P (expected_align_exp
)
22789 && INTVAL (expected_align_exp
) > align
)
22790 align
= INTVAL (expected_align_exp
);
22791 if (CONST_INT_P (count_exp
))
22792 count
= expected_size
= INTVAL (count_exp
);
22793 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22794 expected_size
= INTVAL (expected_size_exp
);
22796 /* Make sure we don't need to care about overflow later on. */
22797 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22800 /* Step 0: Decide on preferred algorithm, desired alignment and
22801 size of chunks to be copied by main loop. */
22803 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22804 desired_align
= decide_alignment (align
, alg
, expected_size
);
22806 if (!TARGET_ALIGN_STRINGOPS
)
22807 align
= desired_align
;
22809 if (alg
== libcall
)
22811 gcc_assert (alg
!= no_stringop
);
22813 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22814 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22819 gcc_unreachable ();
22821 need_zero_guard
= true;
22822 size_needed
= GET_MODE_SIZE (word_mode
);
22824 case unrolled_loop
:
22825 need_zero_guard
= true;
22826 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
22828 case rep_prefix_8_byte
:
22831 case rep_prefix_4_byte
:
22834 case rep_prefix_1_byte
:
22838 need_zero_guard
= true;
22842 epilogue_size_needed
= size_needed
;
22844 /* Step 1: Prologue guard. */
22846 /* Alignment code needs count to be in register. */
22847 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22849 if (INTVAL (count_exp
) > desired_align
22850 && INTVAL (count_exp
) > size_needed
)
22853 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22854 if (align_bytes
<= 0)
22857 align_bytes
= desired_align
- align_bytes
;
22859 if (align_bytes
== 0)
22861 enum machine_mode mode
= SImode
;
22862 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22864 count_exp
= force_reg (mode
, count_exp
);
22867 /* Do the cheap promotion to allow better CSE across the
22868 main loop and epilogue (ie one load of the big constant in the
22869 front of all code. */
22870 if (CONST_INT_P (val_exp
))
22871 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22872 desired_align
, align
);
22873 /* Ensure that alignment prologue won't copy past end of block. */
22874 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22876 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22877 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22878 Make sure it is power of 2. */
22879 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22881 /* To improve performance of small blocks, we jump around the VAL
22882 promoting mode. This mean that if the promoted VAL is not constant,
22883 we might not use it in the epilogue and have to use byte
22885 if (epilogue_size_needed
> 2 && !promoted_val
)
22886 force_loopy_epilogue
= true;
22889 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22891 /* If main algorithm works on QImode, no epilogue is needed.
22892 For small sizes just don't align anything. */
22893 if (size_needed
== 1)
22894 desired_align
= align
;
22901 label
= gen_label_rtx ();
22902 emit_cmp_and_jump_insns (count_exp
,
22903 GEN_INT (epilogue_size_needed
),
22904 LTU
, 0, counter_mode (count_exp
), 1, label
);
22905 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22906 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22908 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22911 if (dynamic_check
!= -1)
22913 rtx hot_label
= gen_label_rtx ();
22914 jump_around_label
= gen_label_rtx ();
22915 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22916 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22917 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22918 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22919 emit_jump (jump_around_label
);
22920 emit_label (hot_label
);
22923 /* Step 2: Alignment prologue. */
22925 /* Do the expensive promotion once we branched off the small blocks. */
22927 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22928 desired_align
, align
);
22929 gcc_assert (desired_align
>= 1 && align
>= 1);
22931 if (desired_align
> align
)
22933 if (align_bytes
== 0)
22935 /* Except for the first move in epilogue, we no longer know
22936 constant offset in aliasing info. It don't seems to worth
22937 the pain to maintain it for the first move, so throw away
22939 dst
= change_address (dst
, BLKmode
, destreg
);
22940 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22945 /* If we know how many bytes need to be stored before dst is
22946 sufficiently aligned, maintain aliasing info accurately. */
22947 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22948 desired_align
, align_bytes
);
22949 count_exp
= plus_constant (counter_mode (count_exp
),
22950 count_exp
, -align_bytes
);
22951 count
-= align_bytes
;
22953 if (need_zero_guard
22954 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22955 || (align_bytes
== 0
22956 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22957 + desired_align
- align
))))
22959 /* It is possible that we copied enough so the main loop will not
22961 gcc_assert (size_needed
> 1);
22962 if (label
== NULL_RTX
)
22963 label
= gen_label_rtx ();
22964 emit_cmp_and_jump_insns (count_exp
,
22965 GEN_INT (size_needed
),
22966 LTU
, 0, counter_mode (count_exp
), 1, label
);
22967 if (expected_size
== -1
22968 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22969 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22971 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22974 if (label
&& size_needed
== 1)
22976 emit_label (label
);
22977 LABEL_NUSES (label
) = 1;
22979 promoted_val
= val_exp
;
22980 epilogue_size_needed
= 1;
22982 else if (label
== NULL_RTX
)
22983 epilogue_size_needed
= size_needed
;
22985 /* Step 3: Main loop. */
22991 gcc_unreachable ();
22993 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22994 count_exp
, QImode
, 1, expected_size
);
22997 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22998 count_exp
, word_mode
, 1, expected_size
);
23000 case unrolled_loop
:
23001 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23002 count_exp
, word_mode
, 4, expected_size
);
23004 case rep_prefix_8_byte
:
23005 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23008 case rep_prefix_4_byte
:
23009 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23012 case rep_prefix_1_byte
:
23013 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23017 /* Adjust properly the offset of src and dest memory for aliasing. */
23018 if (CONST_INT_P (count_exp
))
23019 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23020 (count
/ size_needed
) * size_needed
);
23022 dst
= change_address (dst
, BLKmode
, destreg
);
23024 /* Step 4: Epilogue to copy the remaining bytes. */
23028 /* When the main loop is done, COUNT_EXP might hold original count,
23029 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23030 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23031 bytes. Compensate if needed. */
23033 if (size_needed
< epilogue_size_needed
)
23036 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23037 GEN_INT (size_needed
- 1), count_exp
, 1,
23039 if (tmp
!= count_exp
)
23040 emit_move_insn (count_exp
, tmp
);
23042 emit_label (label
);
23043 LABEL_NUSES (label
) = 1;
23046 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23048 if (force_loopy_epilogue
)
23049 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23050 epilogue_size_needed
);
23052 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23053 epilogue_size_needed
);
23055 if (jump_around_label
)
23056 emit_label (jump_around_label
);
23060 /* Expand the appropriate insns for doing strlen if not just doing
23063 out = result, initialized with the start address
23064 align_rtx = alignment of the address.
23065 scratch = scratch register, initialized with the startaddress when
23066 not aligned, otherwise undefined
23068 This is just the body. It needs the initializations mentioned above and
23069 some address computing at the end. These things are done in i386.md. */
23072 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23076 rtx align_2_label
= NULL_RTX
;
23077 rtx align_3_label
= NULL_RTX
;
23078 rtx align_4_label
= gen_label_rtx ();
23079 rtx end_0_label
= gen_label_rtx ();
23081 rtx tmpreg
= gen_reg_rtx (SImode
);
23082 rtx scratch
= gen_reg_rtx (SImode
);
23086 if (CONST_INT_P (align_rtx
))
23087 align
= INTVAL (align_rtx
);
23089 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23091 /* Is there a known alignment and is it less than 4? */
23094 rtx scratch1
= gen_reg_rtx (Pmode
);
23095 emit_move_insn (scratch1
, out
);
23096 /* Is there a known alignment and is it not 2? */
23099 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23100 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23102 /* Leave just the 3 lower bits. */
23103 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23104 NULL_RTX
, 0, OPTAB_WIDEN
);
23106 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23107 Pmode
, 1, align_4_label
);
23108 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23109 Pmode
, 1, align_2_label
);
23110 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23111 Pmode
, 1, align_3_label
);
23115 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23116 check if is aligned to 4 - byte. */
23118 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23119 NULL_RTX
, 0, OPTAB_WIDEN
);
23121 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23122 Pmode
, 1, align_4_label
);
23125 mem
= change_address (src
, QImode
, out
);
23127 /* Now compare the bytes. */
23129 /* Compare the first n unaligned byte on a byte per byte basis. */
23130 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23131 QImode
, 1, end_0_label
);
23133 /* Increment the address. */
23134 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23136 /* Not needed with an alignment of 2 */
23139 emit_label (align_2_label
);
23141 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23144 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23146 emit_label (align_3_label
);
23149 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23152 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23155 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23156 align this loop. It gives only huge programs, but does not help to
23158 emit_label (align_4_label
);
23160 mem
= change_address (src
, SImode
, out
);
23161 emit_move_insn (scratch
, mem
);
23162 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23164 /* This formula yields a nonzero result iff one of the bytes is zero.
23165 This saves three branches inside loop and many cycles. */
23167 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23168 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23169 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23170 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23171 gen_int_mode (0x80808080, SImode
)));
23172 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23177 rtx reg
= gen_reg_rtx (SImode
);
23178 rtx reg2
= gen_reg_rtx (Pmode
);
23179 emit_move_insn (reg
, tmpreg
);
23180 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23182 /* If zero is not in the first two bytes, move two bytes forward. */
23183 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23184 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23185 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23186 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23187 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23190 /* Emit lea manually to avoid clobbering of flags. */
23191 emit_insn (gen_rtx_SET (SImode
, reg2
,
23192 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23194 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23195 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23196 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23197 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23203 rtx end_2_label
= gen_label_rtx ();
23204 /* Is zero in the first two bytes? */
23206 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23207 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23208 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23209 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23210 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23212 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23213 JUMP_LABEL (tmp
) = end_2_label
;
23215 /* Not in the first two. Move two bytes forward. */
23216 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23217 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23219 emit_label (end_2_label
);
23223 /* Avoid branch in fixing the byte. */
23224 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23225 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23226 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23227 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23228 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23230 emit_label (end_0_label
);
23233 /* Expand strlen. */
23236 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23238 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23240 /* The generic case of strlen expander is long. Avoid it's
23241 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23243 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23244 && !TARGET_INLINE_ALL_STRINGOPS
23245 && !optimize_insn_for_size_p ()
23246 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23249 addr
= force_reg (Pmode
, XEXP (src
, 0));
23250 scratch1
= gen_reg_rtx (Pmode
);
23252 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23253 && !optimize_insn_for_size_p ())
23255 /* Well it seems that some optimizer does not combine a call like
23256 foo(strlen(bar), strlen(bar));
23257 when the move and the subtraction is done here. It does calculate
23258 the length just once when these instructions are done inside of
23259 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23260 often used and I use one fewer register for the lifetime of
23261 output_strlen_unroll() this is better. */
23263 emit_move_insn (out
, addr
);
23265 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23267 /* strlensi_unroll_1 returns the address of the zero at the end of
23268 the string, like memchr(), so compute the length by subtracting
23269 the start address. */
23270 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23276 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23277 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23280 scratch2
= gen_reg_rtx (Pmode
);
23281 scratch3
= gen_reg_rtx (Pmode
);
23282 scratch4
= force_reg (Pmode
, constm1_rtx
);
23284 emit_move_insn (scratch3
, addr
);
23285 eoschar
= force_reg (QImode
, eoschar
);
23287 src
= replace_equiv_address_nv (src
, scratch3
);
23289 /* If .md starts supporting :P, this can be done in .md. */
23290 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23291 scratch4
), UNSPEC_SCAS
);
23292 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23293 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23294 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23299 /* For given symbol (function) construct code to compute address of it's PLT
23300 entry in large x86-64 PIC model. */
23302 construct_plt_address (rtx symbol
)
23306 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23307 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23308 gcc_assert (Pmode
== DImode
);
23310 tmp
= gen_reg_rtx (Pmode
);
23311 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23313 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23314 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23319 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23321 rtx pop
, bool sibcall
)
23323 /* We need to represent that SI and DI registers are clobbered
23325 static int clobbered_registers
[] = {
23326 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23327 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23328 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23329 XMM15_REG
, SI_REG
, DI_REG
23331 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23332 rtx use
= NULL
, call
;
23333 unsigned int vec_len
;
23335 if (pop
== const0_rtx
)
23337 gcc_assert (!TARGET_64BIT
|| !pop
);
23339 if (TARGET_MACHO
&& !TARGET_64BIT
)
23342 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23343 fnaddr
= machopic_indirect_call_target (fnaddr
);
23348 /* Static functions and indirect calls don't need the pic register. */
23349 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23350 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23351 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23352 use_reg (&use
, pic_offset_table_rtx
);
23355 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23357 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23358 emit_move_insn (al
, callarg2
);
23359 use_reg (&use
, al
);
23362 if (ix86_cmodel
== CM_LARGE_PIC
23364 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23365 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23366 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23368 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23369 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23371 fnaddr
= XEXP (fnaddr
, 0);
23372 if (GET_MODE (fnaddr
) != word_mode
)
23373 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23374 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23378 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23380 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23381 vec
[vec_len
++] = call
;
23385 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23386 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23387 vec
[vec_len
++] = pop
;
23390 if (TARGET_64BIT_MS_ABI
23391 && (!callarg2
|| INTVAL (callarg2
) != -2))
23395 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23396 UNSPEC_MS_TO_SYSV_CALL
);
23398 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23400 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23402 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23404 clobbered_registers
[i
]));
23407 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23408 if (TARGET_VZEROUPPER
)
23411 if (cfun
->machine
->callee_pass_avx256_p
)
23413 if (cfun
->machine
->callee_return_avx256_p
)
23414 avx256
= callee_return_pass_avx256
;
23416 avx256
= callee_pass_avx256
;
23418 else if (cfun
->machine
->callee_return_avx256_p
)
23419 avx256
= callee_return_avx256
;
23421 avx256
= call_no_avx256
;
23423 if (reload_completed
)
23424 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23426 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23427 gen_rtvec (1, GEN_INT (avx256
)),
23428 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23432 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23433 call
= emit_call_insn (call
);
23435 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23441 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23443 rtx pat
= PATTERN (insn
);
23444 rtvec vec
= XVEC (pat
, 0);
23445 int len
= GET_NUM_ELEM (vec
) - 1;
23447 /* Strip off the last entry of the parallel. */
23448 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23449 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23451 pat
= RTVEC_ELT (vec
, 0);
23453 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23455 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23456 emit_call_insn (pat
);
23459 /* Output the assembly for a call instruction. */
23462 ix86_output_call_insn (rtx insn
, rtx call_op
)
23464 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23465 bool seh_nop_p
= false;
23468 if (SIBLING_CALL_P (insn
))
23472 /* SEH epilogue detection requires the indirect branch case
23473 to include REX.W. */
23474 else if (TARGET_SEH
)
23475 xasm
= "rex.W jmp %A0";
23479 output_asm_insn (xasm
, &call_op
);
23483 /* SEH unwinding can require an extra nop to be emitted in several
23484 circumstances. Determine if we have one of those. */
23489 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23491 /* If we get to another real insn, we don't need the nop. */
23495 /* If we get to the epilogue note, prevent a catch region from
23496 being adjacent to the standard epilogue sequence. If non-
23497 call-exceptions, we'll have done this during epilogue emission. */
23498 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23499 && !flag_non_call_exceptions
23500 && !can_throw_internal (insn
))
23507 /* If we didn't find a real insn following the call, prevent the
23508 unwinder from looking into the next function. */
23514 xasm
= "call\t%P0";
23516 xasm
= "call\t%A0";
23518 output_asm_insn (xasm
, &call_op
);
23526 /* Clear stack slot assignments remembered from previous functions.
23527 This is called from INIT_EXPANDERS once before RTL is emitted for each
23530 static struct machine_function
*
23531 ix86_init_machine_status (void)
23533 struct machine_function
*f
;
23535 f
= ggc_alloc_cleared_machine_function ();
23536 f
->use_fast_prologue_epilogue_nregs
= -1;
23537 f
->tls_descriptor_call_expanded_p
= 0;
23538 f
->call_abi
= ix86_abi
;
23543 /* Return a MEM corresponding to a stack slot with mode MODE.
23544 Allocate a new slot if necessary.
23546 The RTL for a function can have several slots available: N is
23547 which slot to use. */
23550 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23552 struct stack_local_entry
*s
;
23554 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23556 /* Virtual slot is valid only before vregs are instantiated. */
23557 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23559 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23560 if (s
->mode
== mode
&& s
->n
== n
)
23561 return validize_mem (copy_rtx (s
->rtl
));
23563 s
= ggc_alloc_stack_local_entry ();
23566 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23568 s
->next
= ix86_stack_locals
;
23569 ix86_stack_locals
= s
;
23570 return validize_mem (s
->rtl
);
23573 /* Calculate the length of the memory address in the instruction encoding.
23574 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23575 or other prefixes. */
23578 memory_address_length (rtx addr
)
23580 struct ix86_address parts
;
23581 rtx base
, index
, disp
;
23585 if (GET_CODE (addr
) == PRE_DEC
23586 || GET_CODE (addr
) == POST_INC
23587 || GET_CODE (addr
) == PRE_MODIFY
23588 || GET_CODE (addr
) == POST_MODIFY
)
23591 ok
= ix86_decompose_address (addr
, &parts
);
23594 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23595 parts
.base
= SUBREG_REG (parts
.base
);
23596 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23597 parts
.index
= SUBREG_REG (parts
.index
);
23600 index
= parts
.index
;
23603 /* Add length of addr32 prefix. */
23604 len
= (GET_CODE (addr
) == ZERO_EXTEND
23605 || GET_CODE (addr
) == AND
);
23608 - esp as the base always wants an index,
23609 - ebp as the base always wants a displacement,
23610 - r12 as the base always wants an index,
23611 - r13 as the base always wants a displacement. */
23613 /* Register Indirect. */
23614 if (base
&& !index
&& !disp
)
23616 /* esp (for its index) and ebp (for its displacement) need
23617 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23620 && (addr
== arg_pointer_rtx
23621 || addr
== frame_pointer_rtx
23622 || REGNO (addr
) == SP_REG
23623 || REGNO (addr
) == BP_REG
23624 || REGNO (addr
) == R12_REG
23625 || REGNO (addr
) == R13_REG
))
23629 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23630 is not disp32, but disp32(%rip), so for disp32
23631 SIB byte is needed, unless print_operand_address
23632 optimizes it into disp32(%rip) or (%rip) is implied
23634 else if (disp
&& !base
&& !index
)
23641 if (GET_CODE (disp
) == CONST
)
23642 symbol
= XEXP (disp
, 0);
23643 if (GET_CODE (symbol
) == PLUS
23644 && CONST_INT_P (XEXP (symbol
, 1)))
23645 symbol
= XEXP (symbol
, 0);
23647 if (GET_CODE (symbol
) != LABEL_REF
23648 && (GET_CODE (symbol
) != SYMBOL_REF
23649 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23650 && (GET_CODE (symbol
) != UNSPEC
23651 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23652 && XINT (symbol
, 1) != UNSPEC_PCREL
23653 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23660 /* Find the length of the displacement constant. */
23663 if (base
&& satisfies_constraint_K (disp
))
23668 /* ebp always wants a displacement. Similarly r13. */
23669 else if (base
&& REG_P (base
)
23670 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23673 /* An index requires the two-byte modrm form.... */
23675 /* ...like esp (or r12), which always wants an index. */
23676 || base
== arg_pointer_rtx
23677 || base
== frame_pointer_rtx
23678 || (base
&& REG_P (base
)
23679 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23696 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23697 is set, expect that insn have 8bit immediate alternative. */
23699 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23703 extract_insn_cached (insn
);
23704 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23705 if (CONSTANT_P (recog_data
.operand
[i
]))
23707 enum attr_mode mode
= get_attr_mode (insn
);
23710 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23712 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23719 ival
= trunc_int_for_mode (ival
, HImode
);
23722 ival
= trunc_int_for_mode (ival
, SImode
);
23727 if (IN_RANGE (ival
, -128, 127))
23744 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23749 fatal_insn ("unknown insn mode", insn
);
23754 /* Compute default value for "length_address" attribute. */
23756 ix86_attr_length_address_default (rtx insn
)
23760 if (get_attr_type (insn
) == TYPE_LEA
)
23762 rtx set
= PATTERN (insn
), addr
;
23764 if (GET_CODE (set
) == PARALLEL
)
23765 set
= XVECEXP (set
, 0, 0);
23767 gcc_assert (GET_CODE (set
) == SET
);
23769 addr
= SET_SRC (set
);
23770 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23772 if (GET_CODE (addr
) == ZERO_EXTEND
)
23773 addr
= XEXP (addr
, 0);
23774 if (GET_CODE (addr
) == SUBREG
)
23775 addr
= SUBREG_REG (addr
);
23778 return memory_address_length (addr
);
23781 extract_insn_cached (insn
);
23782 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23783 if (MEM_P (recog_data
.operand
[i
]))
23785 constrain_operands_cached (reload_completed
);
23786 if (which_alternative
!= -1)
23788 const char *constraints
= recog_data
.constraints
[i
];
23789 int alt
= which_alternative
;
23791 while (*constraints
== '=' || *constraints
== '+')
23794 while (*constraints
++ != ',')
23796 /* Skip ignored operands. */
23797 if (*constraints
== 'X')
23800 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23805 /* Compute default value for "length_vex" attribute. It includes
23806 2 or 3 byte VEX prefix and 1 opcode byte. */
23809 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23813 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23814 byte VEX prefix. */
23815 if (!has_0f_opcode
|| has_vex_w
)
23818 /* We can always use 2 byte VEX prefix in 32bit. */
23822 extract_insn_cached (insn
);
23824 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23825 if (REG_P (recog_data
.operand
[i
]))
23827 /* REX.W bit uses 3 byte VEX prefix. */
23828 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23829 && GENERAL_REG_P (recog_data
.operand
[i
]))
23834 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23835 if (MEM_P (recog_data
.operand
[i
])
23836 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23843 /* Return the maximum number of instructions a cpu can issue. */
23846 ix86_issue_rate (void)
23850 case PROCESSOR_PENTIUM
:
23851 case PROCESSOR_ATOM
:
23853 case PROCESSOR_BTVER2
:
23856 case PROCESSOR_PENTIUMPRO
:
23857 case PROCESSOR_PENTIUM4
:
23858 case PROCESSOR_CORE2_32
:
23859 case PROCESSOR_CORE2_64
:
23860 case PROCESSOR_COREI7_32
:
23861 case PROCESSOR_COREI7_64
:
23862 case PROCESSOR_ATHLON
:
23864 case PROCESSOR_AMDFAM10
:
23865 case PROCESSOR_NOCONA
:
23866 case PROCESSOR_GENERIC32
:
23867 case PROCESSOR_GENERIC64
:
23868 case PROCESSOR_BDVER1
:
23869 case PROCESSOR_BDVER2
:
23870 case PROCESSOR_BTVER1
:
23878 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23879 by DEP_INSN and nothing set by DEP_INSN. */
23882 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23886 /* Simplify the test for uninteresting insns. */
23887 if (insn_type
!= TYPE_SETCC
23888 && insn_type
!= TYPE_ICMOV
23889 && insn_type
!= TYPE_FCMOV
23890 && insn_type
!= TYPE_IBR
)
23893 if ((set
= single_set (dep_insn
)) != 0)
23895 set
= SET_DEST (set
);
23898 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23899 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23900 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23901 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23903 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23904 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23909 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23912 /* This test is true if the dependent insn reads the flags but
23913 not any other potentially set register. */
23914 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23917 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23923 /* Return true iff USE_INSN has a memory address with operands set by
23927 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23930 extract_insn_cached (use_insn
);
23931 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23932 if (MEM_P (recog_data
.operand
[i
]))
23934 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23935 return modified_in_p (addr
, set_insn
) != 0;
23941 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23943 enum attr_type insn_type
, dep_insn_type
;
23944 enum attr_memory memory
;
23946 int dep_insn_code_number
;
23948 /* Anti and output dependencies have zero cost on all CPUs. */
23949 if (REG_NOTE_KIND (link
) != 0)
23952 dep_insn_code_number
= recog_memoized (dep_insn
);
23954 /* If we can't recognize the insns, we can't really do anything. */
23955 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23958 insn_type
= get_attr_type (insn
);
23959 dep_insn_type
= get_attr_type (dep_insn
);
23963 case PROCESSOR_PENTIUM
:
23964 /* Address Generation Interlock adds a cycle of latency. */
23965 if (insn_type
== TYPE_LEA
)
23967 rtx addr
= PATTERN (insn
);
23969 if (GET_CODE (addr
) == PARALLEL
)
23970 addr
= XVECEXP (addr
, 0, 0);
23972 gcc_assert (GET_CODE (addr
) == SET
);
23974 addr
= SET_SRC (addr
);
23975 if (modified_in_p (addr
, dep_insn
))
23978 else if (ix86_agi_dependent (dep_insn
, insn
))
23981 /* ??? Compares pair with jump/setcc. */
23982 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23985 /* Floating point stores require value to be ready one cycle earlier. */
23986 if (insn_type
== TYPE_FMOV
23987 && get_attr_memory (insn
) == MEMORY_STORE
23988 && !ix86_agi_dependent (dep_insn
, insn
))
23992 case PROCESSOR_PENTIUMPRO
:
23993 memory
= get_attr_memory (insn
);
23995 /* INT->FP conversion is expensive. */
23996 if (get_attr_fp_int_src (dep_insn
))
23999 /* There is one cycle extra latency between an FP op and a store. */
24000 if (insn_type
== TYPE_FMOV
24001 && (set
= single_set (dep_insn
)) != NULL_RTX
24002 && (set2
= single_set (insn
)) != NULL_RTX
24003 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24004 && MEM_P (SET_DEST (set2
)))
24007 /* Show ability of reorder buffer to hide latency of load by executing
24008 in parallel with previous instruction in case
24009 previous instruction is not needed to compute the address. */
24010 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24011 && !ix86_agi_dependent (dep_insn
, insn
))
24013 /* Claim moves to take one cycle, as core can issue one load
24014 at time and the next load can start cycle later. */
24015 if (dep_insn_type
== TYPE_IMOV
24016 || dep_insn_type
== TYPE_FMOV
)
24024 memory
= get_attr_memory (insn
);
24026 /* The esp dependency is resolved before the instruction is really
24028 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24029 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24032 /* INT->FP conversion is expensive. */
24033 if (get_attr_fp_int_src (dep_insn
))
24036 /* Show ability of reorder buffer to hide latency of load by executing
24037 in parallel with previous instruction in case
24038 previous instruction is not needed to compute the address. */
24039 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24040 && !ix86_agi_dependent (dep_insn
, insn
))
24042 /* Claim moves to take one cycle, as core can issue one load
24043 at time and the next load can start cycle later. */
24044 if (dep_insn_type
== TYPE_IMOV
24045 || dep_insn_type
== TYPE_FMOV
)
24054 case PROCESSOR_ATHLON
:
24056 case PROCESSOR_AMDFAM10
:
24057 case PROCESSOR_BDVER1
:
24058 case PROCESSOR_BDVER2
:
24059 case PROCESSOR_BTVER1
:
24060 case PROCESSOR_BTVER2
:
24061 case PROCESSOR_ATOM
:
24062 case PROCESSOR_GENERIC32
:
24063 case PROCESSOR_GENERIC64
:
24064 memory
= get_attr_memory (insn
);
24066 /* Show ability of reorder buffer to hide latency of load by executing
24067 in parallel with previous instruction in case
24068 previous instruction is not needed to compute the address. */
24069 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24070 && !ix86_agi_dependent (dep_insn
, insn
))
24072 enum attr_unit unit
= get_attr_unit (insn
);
24075 /* Because of the difference between the length of integer and
24076 floating unit pipeline preparation stages, the memory operands
24077 for floating point are cheaper.
24079 ??? For Athlon it the difference is most probably 2. */
24080 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24083 loadcost
= TARGET_ATHLON
? 2 : 0;
24085 if (cost
>= loadcost
)
24098 /* How many alternative schedules to try. This should be as wide as the
24099 scheduling freedom in the DFA, but no wider. Making this value too
24100 large results extra work for the scheduler. */
24103 ia32_multipass_dfa_lookahead (void)
24107 case PROCESSOR_PENTIUM
:
24110 case PROCESSOR_PENTIUMPRO
:
24114 case PROCESSOR_CORE2_32
:
24115 case PROCESSOR_CORE2_64
:
24116 case PROCESSOR_COREI7_32
:
24117 case PROCESSOR_COREI7_64
:
24118 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24119 as many instructions can be executed on a cycle, i.e.,
24120 issue_rate. I wonder why tuning for many CPUs does not do this. */
24121 return ix86_issue_rate ();
24128 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24129 execution. It is applied if
24130 (1) IMUL instruction is on the top of list;
24131 (2) There exists the only producer of independent IMUL instruction in
24133 (3) Put found producer on the top of ready list.
24134 Returns issue rate. */
24137 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24138 int clock_var ATTRIBUTE_UNUSED
)
24140 static int issue_rate
= -1;
24141 int n_ready
= *pn_ready
;
24142 rtx insn
, insn1
, insn2
;
24144 sd_iterator_def sd_it
;
24148 /* Set up issue rate. */
24149 issue_rate
= ix86_issue_rate();
24151 /* Do reodering for Atom only. */
24152 if (ix86_tune
!= PROCESSOR_ATOM
)
24154 /* Nothing to do if ready list contains only 1 instruction. */
24158 /* Check that IMUL instruction is on the top of ready list. */
24159 insn
= ready
[n_ready
- 1];
24160 if (!NONDEBUG_INSN_P (insn
))
24162 insn
= PATTERN (insn
);
24163 if (GET_CODE (insn
) == PARALLEL
)
24164 insn
= XVECEXP (insn
, 0, 0);
24165 if (GET_CODE (insn
) != SET
)
24167 if (!(GET_CODE (SET_SRC (insn
)) == MULT
24168 && GET_MODE (SET_SRC (insn
)) == SImode
))
24171 /* Search for producer of independent IMUL instruction. */
24172 for (i
= n_ready
- 2; i
>= 0; i
--)
24175 if (!NONDEBUG_INSN_P (insn
))
24177 /* Skip IMUL instruction. */
24178 insn2
= PATTERN (insn
);
24179 if (GET_CODE (insn2
) == PARALLEL
)
24180 insn2
= XVECEXP (insn2
, 0, 0);
24181 if (GET_CODE (insn2
) == SET
24182 && GET_CODE (SET_SRC (insn2
)) == MULT
24183 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24186 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24189 con
= DEP_CON (dep
);
24190 if (!NONDEBUG_INSN_P (con
))
24192 insn1
= PATTERN (con
);
24193 if (GET_CODE (insn1
) == PARALLEL
)
24194 insn1
= XVECEXP (insn1
, 0, 0);
24196 if (GET_CODE (insn1
) == SET
24197 && GET_CODE (SET_SRC (insn1
)) == MULT
24198 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24200 sd_iterator_def sd_it1
;
24202 /* Check if there is no other dependee for IMUL. */
24204 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24207 pro
= DEP_PRO (dep1
);
24208 if (!NONDEBUG_INSN_P (pro
))
24221 return issue_rate
; /* Didn't find IMUL producer. */
24223 if (sched_verbose
> 1)
24224 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24225 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24227 /* Put IMUL producer (ready[index]) at the top of ready list. */
24228 insn1
= ready
[index
];
24229 for (i
= index
; i
< n_ready
- 1; i
++)
24230 ready
[i
] = ready
[i
+ 1];
24231 ready
[n_ready
- 1] = insn1
;
24238 /* Model decoder of Core 2/i7.
24239 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24240 track the instruction fetch block boundaries and make sure that long
24241 (9+ bytes) instructions are assigned to D0. */
24243 /* Maximum length of an insn that can be handled by
24244 a secondary decoder unit. '8' for Core 2/i7. */
24245 static int core2i7_secondary_decoder_max_insn_size
;
24247 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24248 '16' for Core 2/i7. */
24249 static int core2i7_ifetch_block_size
;
24251 /* Maximum number of instructions decoder can handle per cycle.
24252 '6' for Core 2/i7. */
24253 static int core2i7_ifetch_block_max_insns
;
24255 typedef struct ix86_first_cycle_multipass_data_
*
24256 ix86_first_cycle_multipass_data_t
;
24257 typedef const struct ix86_first_cycle_multipass_data_
*
24258 const_ix86_first_cycle_multipass_data_t
;
24260 /* A variable to store target state across calls to max_issue within
24262 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24263 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24265 /* Initialize DATA. */
24267 core2i7_first_cycle_multipass_init (void *_data
)
24269 ix86_first_cycle_multipass_data_t data
24270 = (ix86_first_cycle_multipass_data_t
) _data
;
24272 data
->ifetch_block_len
= 0;
24273 data
->ifetch_block_n_insns
= 0;
24274 data
->ready_try_change
= NULL
;
24275 data
->ready_try_change_size
= 0;
24278 /* Advancing the cycle; reset ifetch block counts. */
24280 core2i7_dfa_post_advance_cycle (void)
24282 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24284 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24286 data
->ifetch_block_len
= 0;
24287 data
->ifetch_block_n_insns
= 0;
24290 static int min_insn_size (rtx
);
24292 /* Filter out insns from ready_try that the core will not be able to issue
24293 on current cycle due to decoder. */
24295 core2i7_first_cycle_multipass_filter_ready_try
24296 (const_ix86_first_cycle_multipass_data_t data
,
24297 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24304 if (ready_try
[n_ready
])
24307 insn
= get_ready_element (n_ready
);
24308 insn_size
= min_insn_size (insn
);
24310 if (/* If this is a too long an insn for a secondary decoder ... */
24311 (!first_cycle_insn_p
24312 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24313 /* ... or it would not fit into the ifetch block ... */
24314 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24315 /* ... or the decoder is full already ... */
24316 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24317 /* ... mask the insn out. */
24319 ready_try
[n_ready
] = 1;
24321 if (data
->ready_try_change
)
24322 SET_BIT (data
->ready_try_change
, n_ready
);
24327 /* Prepare for a new round of multipass lookahead scheduling. */
24329 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24330 bool first_cycle_insn_p
)
24332 ix86_first_cycle_multipass_data_t data
24333 = (ix86_first_cycle_multipass_data_t
) _data
;
24334 const_ix86_first_cycle_multipass_data_t prev_data
24335 = ix86_first_cycle_multipass_data
;
24337 /* Restore the state from the end of the previous round. */
24338 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24339 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24341 /* Filter instructions that cannot be issued on current cycle due to
24342 decoder restrictions. */
24343 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24344 first_cycle_insn_p
);
24347 /* INSN is being issued in current solution. Account for its impact on
24348 the decoder model. */
24350 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24351 rtx insn
, const void *_prev_data
)
24353 ix86_first_cycle_multipass_data_t data
24354 = (ix86_first_cycle_multipass_data_t
) _data
;
24355 const_ix86_first_cycle_multipass_data_t prev_data
24356 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24358 int insn_size
= min_insn_size (insn
);
24360 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24361 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24362 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24363 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24365 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24366 if (!data
->ready_try_change
)
24368 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24369 data
->ready_try_change_size
= n_ready
;
24371 else if (data
->ready_try_change_size
< n_ready
)
24373 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24375 data
->ready_try_change_size
= n_ready
;
24377 sbitmap_zero (data
->ready_try_change
);
24379 /* Filter out insns from ready_try that the core will not be able to issue
24380 on current cycle due to decoder. */
24381 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24385 /* Revert the effect on ready_try. */
24387 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24389 int n_ready ATTRIBUTE_UNUSED
)
24391 const_ix86_first_cycle_multipass_data_t data
24392 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24393 unsigned int i
= 0;
24394 sbitmap_iterator sbi
;
24396 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24397 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
24403 /* Save the result of multipass lookahead scheduling for the next round. */
24405 core2i7_first_cycle_multipass_end (const void *_data
)
24407 const_ix86_first_cycle_multipass_data_t data
24408 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24409 ix86_first_cycle_multipass_data_t next_data
24410 = ix86_first_cycle_multipass_data
;
24414 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24415 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24419 /* Deallocate target data. */
24421 core2i7_first_cycle_multipass_fini (void *_data
)
24423 ix86_first_cycle_multipass_data_t data
24424 = (ix86_first_cycle_multipass_data_t
) _data
;
24426 if (data
->ready_try_change
)
24428 sbitmap_free (data
->ready_try_change
);
24429 data
->ready_try_change
= NULL
;
24430 data
->ready_try_change_size
= 0;
24434 /* Prepare for scheduling pass. */
24436 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24437 int verbose ATTRIBUTE_UNUSED
,
24438 int max_uid ATTRIBUTE_UNUSED
)
24440 /* Install scheduling hooks for current CPU. Some of these hooks are used
24441 in time-critical parts of the scheduler, so we only set them up when
24442 they are actually used. */
24445 case PROCESSOR_CORE2_32
:
24446 case PROCESSOR_CORE2_64
:
24447 case PROCESSOR_COREI7_32
:
24448 case PROCESSOR_COREI7_64
:
24449 targetm
.sched
.dfa_post_advance_cycle
24450 = core2i7_dfa_post_advance_cycle
;
24451 targetm
.sched
.first_cycle_multipass_init
24452 = core2i7_first_cycle_multipass_init
;
24453 targetm
.sched
.first_cycle_multipass_begin
24454 = core2i7_first_cycle_multipass_begin
;
24455 targetm
.sched
.first_cycle_multipass_issue
24456 = core2i7_first_cycle_multipass_issue
;
24457 targetm
.sched
.first_cycle_multipass_backtrack
24458 = core2i7_first_cycle_multipass_backtrack
;
24459 targetm
.sched
.first_cycle_multipass_end
24460 = core2i7_first_cycle_multipass_end
;
24461 targetm
.sched
.first_cycle_multipass_fini
24462 = core2i7_first_cycle_multipass_fini
;
24464 /* Set decoder parameters. */
24465 core2i7_secondary_decoder_max_insn_size
= 8;
24466 core2i7_ifetch_block_size
= 16;
24467 core2i7_ifetch_block_max_insns
= 6;
24471 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24472 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24473 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24474 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24475 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24476 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24477 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24483 /* Compute the alignment given to a constant that is being placed in memory.
24484 EXP is the constant and ALIGN is the alignment that the object would
24486 The value of this function is used instead of that alignment to align
24490 ix86_constant_alignment (tree exp
, int align
)
24492 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24493 || TREE_CODE (exp
) == INTEGER_CST
)
24495 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24497 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24500 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24501 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24502 return BITS_PER_WORD
;
24507 /* Compute the alignment for a static variable.
24508 TYPE is the data type, and ALIGN is the alignment that
24509 the object would ordinarily have. The value of this function is used
24510 instead of that alignment to align the object. */
24513 ix86_data_alignment (tree type
, int align
)
24515 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24517 if (AGGREGATE_TYPE_P (type
)
24518 && TYPE_SIZE (type
)
24519 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24520 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24521 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24522 && align
< max_align
)
24525 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24526 to 16byte boundary. */
24529 if (AGGREGATE_TYPE_P (type
)
24530 && TYPE_SIZE (type
)
24531 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24532 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24533 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24537 if (TREE_CODE (type
) == ARRAY_TYPE
)
24539 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24541 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24544 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24547 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24549 if ((TYPE_MODE (type
) == XCmode
24550 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24553 else if ((TREE_CODE (type
) == RECORD_TYPE
24554 || TREE_CODE (type
) == UNION_TYPE
24555 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24556 && TYPE_FIELDS (type
))
24558 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24560 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24563 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24564 || TREE_CODE (type
) == INTEGER_TYPE
)
24566 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24568 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24575 /* Compute the alignment for a local variable or a stack slot. EXP is
24576 the data type or decl itself, MODE is the widest mode available and
24577 ALIGN is the alignment that the object would ordinarily have. The
24578 value of this macro is used instead of that alignment to align the
24582 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24583 unsigned int align
)
24587 if (exp
&& DECL_P (exp
))
24589 type
= TREE_TYPE (exp
);
24598 /* Don't do dynamic stack realignment for long long objects with
24599 -mpreferred-stack-boundary=2. */
24602 && ix86_preferred_stack_boundary
< 64
24603 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24604 && (!type
|| !TYPE_USER_ALIGN (type
))
24605 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24608 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24609 register in MODE. We will return the largest alignment of XF
24613 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24614 align
= GET_MODE_ALIGNMENT (DFmode
);
24618 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24619 to 16byte boundary. Exact wording is:
24621 An array uses the same alignment as its elements, except that a local or
24622 global array variable of length at least 16 bytes or
24623 a C99 variable-length array variable always has alignment of at least 16 bytes.
24625 This was added to allow use of aligned SSE instructions at arrays. This
24626 rule is meant for static storage (where compiler can not do the analysis
24627 by itself). We follow it for automatic variables only when convenient.
24628 We fully control everything in the function compiled and functions from
24629 other unit can not rely on the alignment.
24631 Exclude va_list type. It is the common case of local array where
24632 we can not benefit from the alignment. */
24633 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24636 if (AGGREGATE_TYPE_P (type
)
24637 && (va_list_type_node
== NULL_TREE
24638 || (TYPE_MAIN_VARIANT (type
)
24639 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24640 && TYPE_SIZE (type
)
24641 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24642 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24643 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24646 if (TREE_CODE (type
) == ARRAY_TYPE
)
24648 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24650 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24653 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24655 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24657 if ((TYPE_MODE (type
) == XCmode
24658 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24661 else if ((TREE_CODE (type
) == RECORD_TYPE
24662 || TREE_CODE (type
) == UNION_TYPE
24663 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24664 && TYPE_FIELDS (type
))
24666 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24668 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24671 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24672 || TREE_CODE (type
) == INTEGER_TYPE
)
24675 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24677 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24683 /* Compute the minimum required alignment for dynamic stack realignment
24684 purposes for a local variable, parameter or a stack slot. EXP is
24685 the data type or decl itself, MODE is its mode and ALIGN is the
24686 alignment that the object would ordinarily have. */
24689 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24690 unsigned int align
)
24694 if (exp
&& DECL_P (exp
))
24696 type
= TREE_TYPE (exp
);
24705 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24708 /* Don't do dynamic stack realignment for long long objects with
24709 -mpreferred-stack-boundary=2. */
24710 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24711 && (!type
|| !TYPE_USER_ALIGN (type
))
24712 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24718 /* Find a location for the static chain incoming to a nested function.
24719 This is a register, unless all free registers are used by arguments. */
24722 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24726 if (!DECL_STATIC_CHAIN (fndecl
))
24731 /* We always use R10 in 64-bit mode. */
24739 /* By default in 32-bit mode we use ECX to pass the static chain. */
24742 fntype
= TREE_TYPE (fndecl
);
24743 ccvt
= ix86_get_callcvt (fntype
);
24744 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24746 /* Fastcall functions use ecx/edx for arguments, which leaves
24747 us with EAX for the static chain.
24748 Thiscall functions use ecx for arguments, which also
24749 leaves us with EAX for the static chain. */
24752 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24754 /* For regparm 3, we have no free call-clobbered registers in
24755 which to store the static chain. In order to implement this,
24756 we have the trampoline push the static chain to the stack.
24757 However, we can't push a value below the return address when
24758 we call the nested function directly, so we have to use an
24759 alternate entry point. For this we use ESI, and have the
24760 alternate entry point push ESI, so that things appear the
24761 same once we're executing the nested function. */
24764 if (fndecl
== current_function_decl
)
24765 ix86_static_chain_on_stack
= true;
24766 return gen_frame_mem (SImode
,
24767 plus_constant (Pmode
,
24768 arg_pointer_rtx
, -8));
24774 return gen_rtx_REG (Pmode
, regno
);
24777 /* Emit RTL insns to initialize the variable parts of a trampoline.
24778 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24779 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24780 to be passed to the target function. */
24783 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24789 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24795 /* Load the function address to r11. Try to load address using
24796 the shorter movl instead of movabs. We may want to support
24797 movq for kernel mode, but kernel does not use trampolines at
24798 the moment. FNADDR is a 32bit address and may not be in
24799 DImode when ptr_mode == SImode. Always use movl in this
24801 if (ptr_mode
== SImode
24802 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24804 fnaddr
= copy_addr_to_reg (fnaddr
);
24806 mem
= adjust_address (m_tramp
, HImode
, offset
);
24807 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24809 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24810 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24815 mem
= adjust_address (m_tramp
, HImode
, offset
);
24816 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24818 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24819 emit_move_insn (mem
, fnaddr
);
24823 /* Load static chain using movabs to r10. Use the shorter movl
24824 instead of movabs when ptr_mode == SImode. */
24825 if (ptr_mode
== SImode
)
24836 mem
= adjust_address (m_tramp
, HImode
, offset
);
24837 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24839 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24840 emit_move_insn (mem
, chain_value
);
24843 /* Jump to r11; the last (unused) byte is a nop, only there to
24844 pad the write out to a single 32-bit store. */
24845 mem
= adjust_address (m_tramp
, SImode
, offset
);
24846 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24853 /* Depending on the static chain location, either load a register
24854 with a constant, or push the constant to the stack. All of the
24855 instructions are the same size. */
24856 chain
= ix86_static_chain (fndecl
, true);
24859 switch (REGNO (chain
))
24862 opcode
= 0xb8; break;
24864 opcode
= 0xb9; break;
24866 gcc_unreachable ();
24872 mem
= adjust_address (m_tramp
, QImode
, offset
);
24873 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24875 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24876 emit_move_insn (mem
, chain_value
);
24879 mem
= adjust_address (m_tramp
, QImode
, offset
);
24880 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24882 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24884 /* Compute offset from the end of the jmp to the target function.
24885 In the case in which the trampoline stores the static chain on
24886 the stack, we need to skip the first insn which pushes the
24887 (call-saved) register static chain; this push is 1 byte. */
24889 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24890 plus_constant (Pmode
, XEXP (m_tramp
, 0),
24891 offset
- (MEM_P (chain
) ? 1 : 0)),
24892 NULL_RTX
, 1, OPTAB_DIRECT
);
24893 emit_move_insn (mem
, disp
);
24896 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24898 #ifdef HAVE_ENABLE_EXECUTE_STACK
24899 #ifdef CHECK_EXECUTE_STACK_ENABLED
24900 if (CHECK_EXECUTE_STACK_ENABLED
)
24902 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24903 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24907 /* The following file contains several enumerations and data structures
24908 built from the definitions in i386-builtin-types.def. */
24910 #include "i386-builtin-types.inc"
24912 /* Table for the ix86 builtin non-function types. */
24913 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24915 /* Retrieve an element from the above table, building some of
24916 the types lazily. */
24919 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24921 unsigned int index
;
24924 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24926 type
= ix86_builtin_type_tab
[(int) tcode
];
24930 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24931 if (tcode
<= IX86_BT_LAST_VECT
)
24933 enum machine_mode mode
;
24935 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24936 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24937 mode
= ix86_builtin_type_vect_mode
[index
];
24939 type
= build_vector_type_for_mode (itype
, mode
);
24945 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24946 if (tcode
<= IX86_BT_LAST_PTR
)
24947 quals
= TYPE_UNQUALIFIED
;
24949 quals
= TYPE_QUAL_CONST
;
24951 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24952 if (quals
!= TYPE_UNQUALIFIED
)
24953 itype
= build_qualified_type (itype
, quals
);
24955 type
= build_pointer_type (itype
);
24958 ix86_builtin_type_tab
[(int) tcode
] = type
;
24962 /* Table for the ix86 builtin function types. */
24963 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24965 /* Retrieve an element from the above table, building some of
24966 the types lazily. */
24969 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24973 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24975 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24979 if (tcode
<= IX86_BT_LAST_FUNC
)
24981 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24982 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24983 tree rtype
, atype
, args
= void_list_node
;
24986 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24987 for (i
= after
- 1; i
> start
; --i
)
24989 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24990 args
= tree_cons (NULL
, atype
, args
);
24993 type
= build_function_type (rtype
, args
);
24997 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24998 enum ix86_builtin_func_type icode
;
25000 icode
= ix86_builtin_func_alias_base
[index
];
25001 type
= ix86_get_builtin_func_type (icode
);
25004 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25009 /* Codes for all the SSE/MMX builtins. */
25012 IX86_BUILTIN_ADDPS
,
25013 IX86_BUILTIN_ADDSS
,
25014 IX86_BUILTIN_DIVPS
,
25015 IX86_BUILTIN_DIVSS
,
25016 IX86_BUILTIN_MULPS
,
25017 IX86_BUILTIN_MULSS
,
25018 IX86_BUILTIN_SUBPS
,
25019 IX86_BUILTIN_SUBSS
,
25021 IX86_BUILTIN_CMPEQPS
,
25022 IX86_BUILTIN_CMPLTPS
,
25023 IX86_BUILTIN_CMPLEPS
,
25024 IX86_BUILTIN_CMPGTPS
,
25025 IX86_BUILTIN_CMPGEPS
,
25026 IX86_BUILTIN_CMPNEQPS
,
25027 IX86_BUILTIN_CMPNLTPS
,
25028 IX86_BUILTIN_CMPNLEPS
,
25029 IX86_BUILTIN_CMPNGTPS
,
25030 IX86_BUILTIN_CMPNGEPS
,
25031 IX86_BUILTIN_CMPORDPS
,
25032 IX86_BUILTIN_CMPUNORDPS
,
25033 IX86_BUILTIN_CMPEQSS
,
25034 IX86_BUILTIN_CMPLTSS
,
25035 IX86_BUILTIN_CMPLESS
,
25036 IX86_BUILTIN_CMPNEQSS
,
25037 IX86_BUILTIN_CMPNLTSS
,
25038 IX86_BUILTIN_CMPNLESS
,
25039 IX86_BUILTIN_CMPNGTSS
,
25040 IX86_BUILTIN_CMPNGESS
,
25041 IX86_BUILTIN_CMPORDSS
,
25042 IX86_BUILTIN_CMPUNORDSS
,
25044 IX86_BUILTIN_COMIEQSS
,
25045 IX86_BUILTIN_COMILTSS
,
25046 IX86_BUILTIN_COMILESS
,
25047 IX86_BUILTIN_COMIGTSS
,
25048 IX86_BUILTIN_COMIGESS
,
25049 IX86_BUILTIN_COMINEQSS
,
25050 IX86_BUILTIN_UCOMIEQSS
,
25051 IX86_BUILTIN_UCOMILTSS
,
25052 IX86_BUILTIN_UCOMILESS
,
25053 IX86_BUILTIN_UCOMIGTSS
,
25054 IX86_BUILTIN_UCOMIGESS
,
25055 IX86_BUILTIN_UCOMINEQSS
,
25057 IX86_BUILTIN_CVTPI2PS
,
25058 IX86_BUILTIN_CVTPS2PI
,
25059 IX86_BUILTIN_CVTSI2SS
,
25060 IX86_BUILTIN_CVTSI642SS
,
25061 IX86_BUILTIN_CVTSS2SI
,
25062 IX86_BUILTIN_CVTSS2SI64
,
25063 IX86_BUILTIN_CVTTPS2PI
,
25064 IX86_BUILTIN_CVTTSS2SI
,
25065 IX86_BUILTIN_CVTTSS2SI64
,
25067 IX86_BUILTIN_MAXPS
,
25068 IX86_BUILTIN_MAXSS
,
25069 IX86_BUILTIN_MINPS
,
25070 IX86_BUILTIN_MINSS
,
25072 IX86_BUILTIN_LOADUPS
,
25073 IX86_BUILTIN_STOREUPS
,
25074 IX86_BUILTIN_MOVSS
,
25076 IX86_BUILTIN_MOVHLPS
,
25077 IX86_BUILTIN_MOVLHPS
,
25078 IX86_BUILTIN_LOADHPS
,
25079 IX86_BUILTIN_LOADLPS
,
25080 IX86_BUILTIN_STOREHPS
,
25081 IX86_BUILTIN_STORELPS
,
25083 IX86_BUILTIN_MASKMOVQ
,
25084 IX86_BUILTIN_MOVMSKPS
,
25085 IX86_BUILTIN_PMOVMSKB
,
25087 IX86_BUILTIN_MOVNTPS
,
25088 IX86_BUILTIN_MOVNTQ
,
25090 IX86_BUILTIN_LOADDQU
,
25091 IX86_BUILTIN_STOREDQU
,
25093 IX86_BUILTIN_PACKSSWB
,
25094 IX86_BUILTIN_PACKSSDW
,
25095 IX86_BUILTIN_PACKUSWB
,
25097 IX86_BUILTIN_PADDB
,
25098 IX86_BUILTIN_PADDW
,
25099 IX86_BUILTIN_PADDD
,
25100 IX86_BUILTIN_PADDQ
,
25101 IX86_BUILTIN_PADDSB
,
25102 IX86_BUILTIN_PADDSW
,
25103 IX86_BUILTIN_PADDUSB
,
25104 IX86_BUILTIN_PADDUSW
,
25105 IX86_BUILTIN_PSUBB
,
25106 IX86_BUILTIN_PSUBW
,
25107 IX86_BUILTIN_PSUBD
,
25108 IX86_BUILTIN_PSUBQ
,
25109 IX86_BUILTIN_PSUBSB
,
25110 IX86_BUILTIN_PSUBSW
,
25111 IX86_BUILTIN_PSUBUSB
,
25112 IX86_BUILTIN_PSUBUSW
,
25115 IX86_BUILTIN_PANDN
,
25119 IX86_BUILTIN_PAVGB
,
25120 IX86_BUILTIN_PAVGW
,
25122 IX86_BUILTIN_PCMPEQB
,
25123 IX86_BUILTIN_PCMPEQW
,
25124 IX86_BUILTIN_PCMPEQD
,
25125 IX86_BUILTIN_PCMPGTB
,
25126 IX86_BUILTIN_PCMPGTW
,
25127 IX86_BUILTIN_PCMPGTD
,
25129 IX86_BUILTIN_PMADDWD
,
25131 IX86_BUILTIN_PMAXSW
,
25132 IX86_BUILTIN_PMAXUB
,
25133 IX86_BUILTIN_PMINSW
,
25134 IX86_BUILTIN_PMINUB
,
25136 IX86_BUILTIN_PMULHUW
,
25137 IX86_BUILTIN_PMULHW
,
25138 IX86_BUILTIN_PMULLW
,
25140 IX86_BUILTIN_PSADBW
,
25141 IX86_BUILTIN_PSHUFW
,
25143 IX86_BUILTIN_PSLLW
,
25144 IX86_BUILTIN_PSLLD
,
25145 IX86_BUILTIN_PSLLQ
,
25146 IX86_BUILTIN_PSRAW
,
25147 IX86_BUILTIN_PSRAD
,
25148 IX86_BUILTIN_PSRLW
,
25149 IX86_BUILTIN_PSRLD
,
25150 IX86_BUILTIN_PSRLQ
,
25151 IX86_BUILTIN_PSLLWI
,
25152 IX86_BUILTIN_PSLLDI
,
25153 IX86_BUILTIN_PSLLQI
,
25154 IX86_BUILTIN_PSRAWI
,
25155 IX86_BUILTIN_PSRADI
,
25156 IX86_BUILTIN_PSRLWI
,
25157 IX86_BUILTIN_PSRLDI
,
25158 IX86_BUILTIN_PSRLQI
,
25160 IX86_BUILTIN_PUNPCKHBW
,
25161 IX86_BUILTIN_PUNPCKHWD
,
25162 IX86_BUILTIN_PUNPCKHDQ
,
25163 IX86_BUILTIN_PUNPCKLBW
,
25164 IX86_BUILTIN_PUNPCKLWD
,
25165 IX86_BUILTIN_PUNPCKLDQ
,
25167 IX86_BUILTIN_SHUFPS
,
25169 IX86_BUILTIN_RCPPS
,
25170 IX86_BUILTIN_RCPSS
,
25171 IX86_BUILTIN_RSQRTPS
,
25172 IX86_BUILTIN_RSQRTPS_NR
,
25173 IX86_BUILTIN_RSQRTSS
,
25174 IX86_BUILTIN_RSQRTF
,
25175 IX86_BUILTIN_SQRTPS
,
25176 IX86_BUILTIN_SQRTPS_NR
,
25177 IX86_BUILTIN_SQRTSS
,
25179 IX86_BUILTIN_UNPCKHPS
,
25180 IX86_BUILTIN_UNPCKLPS
,
25182 IX86_BUILTIN_ANDPS
,
25183 IX86_BUILTIN_ANDNPS
,
25185 IX86_BUILTIN_XORPS
,
25188 IX86_BUILTIN_LDMXCSR
,
25189 IX86_BUILTIN_STMXCSR
,
25190 IX86_BUILTIN_SFENCE
,
25192 /* 3DNow! Original */
25193 IX86_BUILTIN_FEMMS
,
25194 IX86_BUILTIN_PAVGUSB
,
25195 IX86_BUILTIN_PF2ID
,
25196 IX86_BUILTIN_PFACC
,
25197 IX86_BUILTIN_PFADD
,
25198 IX86_BUILTIN_PFCMPEQ
,
25199 IX86_BUILTIN_PFCMPGE
,
25200 IX86_BUILTIN_PFCMPGT
,
25201 IX86_BUILTIN_PFMAX
,
25202 IX86_BUILTIN_PFMIN
,
25203 IX86_BUILTIN_PFMUL
,
25204 IX86_BUILTIN_PFRCP
,
25205 IX86_BUILTIN_PFRCPIT1
,
25206 IX86_BUILTIN_PFRCPIT2
,
25207 IX86_BUILTIN_PFRSQIT1
,
25208 IX86_BUILTIN_PFRSQRT
,
25209 IX86_BUILTIN_PFSUB
,
25210 IX86_BUILTIN_PFSUBR
,
25211 IX86_BUILTIN_PI2FD
,
25212 IX86_BUILTIN_PMULHRW
,
25214 /* 3DNow! Athlon Extensions */
25215 IX86_BUILTIN_PF2IW
,
25216 IX86_BUILTIN_PFNACC
,
25217 IX86_BUILTIN_PFPNACC
,
25218 IX86_BUILTIN_PI2FW
,
25219 IX86_BUILTIN_PSWAPDSI
,
25220 IX86_BUILTIN_PSWAPDSF
,
25223 IX86_BUILTIN_ADDPD
,
25224 IX86_BUILTIN_ADDSD
,
25225 IX86_BUILTIN_DIVPD
,
25226 IX86_BUILTIN_DIVSD
,
25227 IX86_BUILTIN_MULPD
,
25228 IX86_BUILTIN_MULSD
,
25229 IX86_BUILTIN_SUBPD
,
25230 IX86_BUILTIN_SUBSD
,
25232 IX86_BUILTIN_CMPEQPD
,
25233 IX86_BUILTIN_CMPLTPD
,
25234 IX86_BUILTIN_CMPLEPD
,
25235 IX86_BUILTIN_CMPGTPD
,
25236 IX86_BUILTIN_CMPGEPD
,
25237 IX86_BUILTIN_CMPNEQPD
,
25238 IX86_BUILTIN_CMPNLTPD
,
25239 IX86_BUILTIN_CMPNLEPD
,
25240 IX86_BUILTIN_CMPNGTPD
,
25241 IX86_BUILTIN_CMPNGEPD
,
25242 IX86_BUILTIN_CMPORDPD
,
25243 IX86_BUILTIN_CMPUNORDPD
,
25244 IX86_BUILTIN_CMPEQSD
,
25245 IX86_BUILTIN_CMPLTSD
,
25246 IX86_BUILTIN_CMPLESD
,
25247 IX86_BUILTIN_CMPNEQSD
,
25248 IX86_BUILTIN_CMPNLTSD
,
25249 IX86_BUILTIN_CMPNLESD
,
25250 IX86_BUILTIN_CMPORDSD
,
25251 IX86_BUILTIN_CMPUNORDSD
,
25253 IX86_BUILTIN_COMIEQSD
,
25254 IX86_BUILTIN_COMILTSD
,
25255 IX86_BUILTIN_COMILESD
,
25256 IX86_BUILTIN_COMIGTSD
,
25257 IX86_BUILTIN_COMIGESD
,
25258 IX86_BUILTIN_COMINEQSD
,
25259 IX86_BUILTIN_UCOMIEQSD
,
25260 IX86_BUILTIN_UCOMILTSD
,
25261 IX86_BUILTIN_UCOMILESD
,
25262 IX86_BUILTIN_UCOMIGTSD
,
25263 IX86_BUILTIN_UCOMIGESD
,
25264 IX86_BUILTIN_UCOMINEQSD
,
25266 IX86_BUILTIN_MAXPD
,
25267 IX86_BUILTIN_MAXSD
,
25268 IX86_BUILTIN_MINPD
,
25269 IX86_BUILTIN_MINSD
,
25271 IX86_BUILTIN_ANDPD
,
25272 IX86_BUILTIN_ANDNPD
,
25274 IX86_BUILTIN_XORPD
,
25276 IX86_BUILTIN_SQRTPD
,
25277 IX86_BUILTIN_SQRTSD
,
25279 IX86_BUILTIN_UNPCKHPD
,
25280 IX86_BUILTIN_UNPCKLPD
,
25282 IX86_BUILTIN_SHUFPD
,
25284 IX86_BUILTIN_LOADUPD
,
25285 IX86_BUILTIN_STOREUPD
,
25286 IX86_BUILTIN_MOVSD
,
25288 IX86_BUILTIN_LOADHPD
,
25289 IX86_BUILTIN_LOADLPD
,
25291 IX86_BUILTIN_CVTDQ2PD
,
25292 IX86_BUILTIN_CVTDQ2PS
,
25294 IX86_BUILTIN_CVTPD2DQ
,
25295 IX86_BUILTIN_CVTPD2PI
,
25296 IX86_BUILTIN_CVTPD2PS
,
25297 IX86_BUILTIN_CVTTPD2DQ
,
25298 IX86_BUILTIN_CVTTPD2PI
,
25300 IX86_BUILTIN_CVTPI2PD
,
25301 IX86_BUILTIN_CVTSI2SD
,
25302 IX86_BUILTIN_CVTSI642SD
,
25304 IX86_BUILTIN_CVTSD2SI
,
25305 IX86_BUILTIN_CVTSD2SI64
,
25306 IX86_BUILTIN_CVTSD2SS
,
25307 IX86_BUILTIN_CVTSS2SD
,
25308 IX86_BUILTIN_CVTTSD2SI
,
25309 IX86_BUILTIN_CVTTSD2SI64
,
25311 IX86_BUILTIN_CVTPS2DQ
,
25312 IX86_BUILTIN_CVTPS2PD
,
25313 IX86_BUILTIN_CVTTPS2DQ
,
25315 IX86_BUILTIN_MOVNTI
,
25316 IX86_BUILTIN_MOVNTI64
,
25317 IX86_BUILTIN_MOVNTPD
,
25318 IX86_BUILTIN_MOVNTDQ
,
25320 IX86_BUILTIN_MOVQ128
,
25323 IX86_BUILTIN_MASKMOVDQU
,
25324 IX86_BUILTIN_MOVMSKPD
,
25325 IX86_BUILTIN_PMOVMSKB128
,
25327 IX86_BUILTIN_PACKSSWB128
,
25328 IX86_BUILTIN_PACKSSDW128
,
25329 IX86_BUILTIN_PACKUSWB128
,
25331 IX86_BUILTIN_PADDB128
,
25332 IX86_BUILTIN_PADDW128
,
25333 IX86_BUILTIN_PADDD128
,
25334 IX86_BUILTIN_PADDQ128
,
25335 IX86_BUILTIN_PADDSB128
,
25336 IX86_BUILTIN_PADDSW128
,
25337 IX86_BUILTIN_PADDUSB128
,
25338 IX86_BUILTIN_PADDUSW128
,
25339 IX86_BUILTIN_PSUBB128
,
25340 IX86_BUILTIN_PSUBW128
,
25341 IX86_BUILTIN_PSUBD128
,
25342 IX86_BUILTIN_PSUBQ128
,
25343 IX86_BUILTIN_PSUBSB128
,
25344 IX86_BUILTIN_PSUBSW128
,
25345 IX86_BUILTIN_PSUBUSB128
,
25346 IX86_BUILTIN_PSUBUSW128
,
25348 IX86_BUILTIN_PAND128
,
25349 IX86_BUILTIN_PANDN128
,
25350 IX86_BUILTIN_POR128
,
25351 IX86_BUILTIN_PXOR128
,
25353 IX86_BUILTIN_PAVGB128
,
25354 IX86_BUILTIN_PAVGW128
,
25356 IX86_BUILTIN_PCMPEQB128
,
25357 IX86_BUILTIN_PCMPEQW128
,
25358 IX86_BUILTIN_PCMPEQD128
,
25359 IX86_BUILTIN_PCMPGTB128
,
25360 IX86_BUILTIN_PCMPGTW128
,
25361 IX86_BUILTIN_PCMPGTD128
,
25363 IX86_BUILTIN_PMADDWD128
,
25365 IX86_BUILTIN_PMAXSW128
,
25366 IX86_BUILTIN_PMAXUB128
,
25367 IX86_BUILTIN_PMINSW128
,
25368 IX86_BUILTIN_PMINUB128
,
25370 IX86_BUILTIN_PMULUDQ
,
25371 IX86_BUILTIN_PMULUDQ128
,
25372 IX86_BUILTIN_PMULHUW128
,
25373 IX86_BUILTIN_PMULHW128
,
25374 IX86_BUILTIN_PMULLW128
,
25376 IX86_BUILTIN_PSADBW128
,
25377 IX86_BUILTIN_PSHUFHW
,
25378 IX86_BUILTIN_PSHUFLW
,
25379 IX86_BUILTIN_PSHUFD
,
25381 IX86_BUILTIN_PSLLDQI128
,
25382 IX86_BUILTIN_PSLLWI128
,
25383 IX86_BUILTIN_PSLLDI128
,
25384 IX86_BUILTIN_PSLLQI128
,
25385 IX86_BUILTIN_PSRAWI128
,
25386 IX86_BUILTIN_PSRADI128
,
25387 IX86_BUILTIN_PSRLDQI128
,
25388 IX86_BUILTIN_PSRLWI128
,
25389 IX86_BUILTIN_PSRLDI128
,
25390 IX86_BUILTIN_PSRLQI128
,
25392 IX86_BUILTIN_PSLLDQ128
,
25393 IX86_BUILTIN_PSLLW128
,
25394 IX86_BUILTIN_PSLLD128
,
25395 IX86_BUILTIN_PSLLQ128
,
25396 IX86_BUILTIN_PSRAW128
,
25397 IX86_BUILTIN_PSRAD128
,
25398 IX86_BUILTIN_PSRLW128
,
25399 IX86_BUILTIN_PSRLD128
,
25400 IX86_BUILTIN_PSRLQ128
,
25402 IX86_BUILTIN_PUNPCKHBW128
,
25403 IX86_BUILTIN_PUNPCKHWD128
,
25404 IX86_BUILTIN_PUNPCKHDQ128
,
25405 IX86_BUILTIN_PUNPCKHQDQ128
,
25406 IX86_BUILTIN_PUNPCKLBW128
,
25407 IX86_BUILTIN_PUNPCKLWD128
,
25408 IX86_BUILTIN_PUNPCKLDQ128
,
25409 IX86_BUILTIN_PUNPCKLQDQ128
,
25411 IX86_BUILTIN_CLFLUSH
,
25412 IX86_BUILTIN_MFENCE
,
25413 IX86_BUILTIN_LFENCE
,
25414 IX86_BUILTIN_PAUSE
,
25416 IX86_BUILTIN_BSRSI
,
25417 IX86_BUILTIN_BSRDI
,
25418 IX86_BUILTIN_RDPMC
,
25419 IX86_BUILTIN_RDTSC
,
25420 IX86_BUILTIN_RDTSCP
,
25421 IX86_BUILTIN_ROLQI
,
25422 IX86_BUILTIN_ROLHI
,
25423 IX86_BUILTIN_RORQI
,
25424 IX86_BUILTIN_RORHI
,
25427 IX86_BUILTIN_ADDSUBPS
,
25428 IX86_BUILTIN_HADDPS
,
25429 IX86_BUILTIN_HSUBPS
,
25430 IX86_BUILTIN_MOVSHDUP
,
25431 IX86_BUILTIN_MOVSLDUP
,
25432 IX86_BUILTIN_ADDSUBPD
,
25433 IX86_BUILTIN_HADDPD
,
25434 IX86_BUILTIN_HSUBPD
,
25435 IX86_BUILTIN_LDDQU
,
25437 IX86_BUILTIN_MONITOR
,
25438 IX86_BUILTIN_MWAIT
,
25441 IX86_BUILTIN_PHADDW
,
25442 IX86_BUILTIN_PHADDD
,
25443 IX86_BUILTIN_PHADDSW
,
25444 IX86_BUILTIN_PHSUBW
,
25445 IX86_BUILTIN_PHSUBD
,
25446 IX86_BUILTIN_PHSUBSW
,
25447 IX86_BUILTIN_PMADDUBSW
,
25448 IX86_BUILTIN_PMULHRSW
,
25449 IX86_BUILTIN_PSHUFB
,
25450 IX86_BUILTIN_PSIGNB
,
25451 IX86_BUILTIN_PSIGNW
,
25452 IX86_BUILTIN_PSIGND
,
25453 IX86_BUILTIN_PALIGNR
,
25454 IX86_BUILTIN_PABSB
,
25455 IX86_BUILTIN_PABSW
,
25456 IX86_BUILTIN_PABSD
,
25458 IX86_BUILTIN_PHADDW128
,
25459 IX86_BUILTIN_PHADDD128
,
25460 IX86_BUILTIN_PHADDSW128
,
25461 IX86_BUILTIN_PHSUBW128
,
25462 IX86_BUILTIN_PHSUBD128
,
25463 IX86_BUILTIN_PHSUBSW128
,
25464 IX86_BUILTIN_PMADDUBSW128
,
25465 IX86_BUILTIN_PMULHRSW128
,
25466 IX86_BUILTIN_PSHUFB128
,
25467 IX86_BUILTIN_PSIGNB128
,
25468 IX86_BUILTIN_PSIGNW128
,
25469 IX86_BUILTIN_PSIGND128
,
25470 IX86_BUILTIN_PALIGNR128
,
25471 IX86_BUILTIN_PABSB128
,
25472 IX86_BUILTIN_PABSW128
,
25473 IX86_BUILTIN_PABSD128
,
25475 /* AMDFAM10 - SSE4A New Instructions. */
25476 IX86_BUILTIN_MOVNTSD
,
25477 IX86_BUILTIN_MOVNTSS
,
25478 IX86_BUILTIN_EXTRQI
,
25479 IX86_BUILTIN_EXTRQ
,
25480 IX86_BUILTIN_INSERTQI
,
25481 IX86_BUILTIN_INSERTQ
,
25484 IX86_BUILTIN_BLENDPD
,
25485 IX86_BUILTIN_BLENDPS
,
25486 IX86_BUILTIN_BLENDVPD
,
25487 IX86_BUILTIN_BLENDVPS
,
25488 IX86_BUILTIN_PBLENDVB128
,
25489 IX86_BUILTIN_PBLENDW128
,
25494 IX86_BUILTIN_INSERTPS128
,
25496 IX86_BUILTIN_MOVNTDQA
,
25497 IX86_BUILTIN_MPSADBW128
,
25498 IX86_BUILTIN_PACKUSDW128
,
25499 IX86_BUILTIN_PCMPEQQ
,
25500 IX86_BUILTIN_PHMINPOSUW128
,
25502 IX86_BUILTIN_PMAXSB128
,
25503 IX86_BUILTIN_PMAXSD128
,
25504 IX86_BUILTIN_PMAXUD128
,
25505 IX86_BUILTIN_PMAXUW128
,
25507 IX86_BUILTIN_PMINSB128
,
25508 IX86_BUILTIN_PMINSD128
,
25509 IX86_BUILTIN_PMINUD128
,
25510 IX86_BUILTIN_PMINUW128
,
25512 IX86_BUILTIN_PMOVSXBW128
,
25513 IX86_BUILTIN_PMOVSXBD128
,
25514 IX86_BUILTIN_PMOVSXBQ128
,
25515 IX86_BUILTIN_PMOVSXWD128
,
25516 IX86_BUILTIN_PMOVSXWQ128
,
25517 IX86_BUILTIN_PMOVSXDQ128
,
25519 IX86_BUILTIN_PMOVZXBW128
,
25520 IX86_BUILTIN_PMOVZXBD128
,
25521 IX86_BUILTIN_PMOVZXBQ128
,
25522 IX86_BUILTIN_PMOVZXWD128
,
25523 IX86_BUILTIN_PMOVZXWQ128
,
25524 IX86_BUILTIN_PMOVZXDQ128
,
25526 IX86_BUILTIN_PMULDQ128
,
25527 IX86_BUILTIN_PMULLD128
,
25529 IX86_BUILTIN_ROUNDSD
,
25530 IX86_BUILTIN_ROUNDSS
,
25532 IX86_BUILTIN_ROUNDPD
,
25533 IX86_BUILTIN_ROUNDPS
,
25535 IX86_BUILTIN_FLOORPD
,
25536 IX86_BUILTIN_CEILPD
,
25537 IX86_BUILTIN_TRUNCPD
,
25538 IX86_BUILTIN_RINTPD
,
25539 IX86_BUILTIN_ROUNDPD_AZ
,
25541 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25542 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25543 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25545 IX86_BUILTIN_FLOORPS
,
25546 IX86_BUILTIN_CEILPS
,
25547 IX86_BUILTIN_TRUNCPS
,
25548 IX86_BUILTIN_RINTPS
,
25549 IX86_BUILTIN_ROUNDPS_AZ
,
25551 IX86_BUILTIN_FLOORPS_SFIX
,
25552 IX86_BUILTIN_CEILPS_SFIX
,
25553 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25555 IX86_BUILTIN_PTESTZ
,
25556 IX86_BUILTIN_PTESTC
,
25557 IX86_BUILTIN_PTESTNZC
,
25559 IX86_BUILTIN_VEC_INIT_V2SI
,
25560 IX86_BUILTIN_VEC_INIT_V4HI
,
25561 IX86_BUILTIN_VEC_INIT_V8QI
,
25562 IX86_BUILTIN_VEC_EXT_V2DF
,
25563 IX86_BUILTIN_VEC_EXT_V2DI
,
25564 IX86_BUILTIN_VEC_EXT_V4SF
,
25565 IX86_BUILTIN_VEC_EXT_V4SI
,
25566 IX86_BUILTIN_VEC_EXT_V8HI
,
25567 IX86_BUILTIN_VEC_EXT_V2SI
,
25568 IX86_BUILTIN_VEC_EXT_V4HI
,
25569 IX86_BUILTIN_VEC_EXT_V16QI
,
25570 IX86_BUILTIN_VEC_SET_V2DI
,
25571 IX86_BUILTIN_VEC_SET_V4SF
,
25572 IX86_BUILTIN_VEC_SET_V4SI
,
25573 IX86_BUILTIN_VEC_SET_V8HI
,
25574 IX86_BUILTIN_VEC_SET_V4HI
,
25575 IX86_BUILTIN_VEC_SET_V16QI
,
25577 IX86_BUILTIN_VEC_PACK_SFIX
,
25578 IX86_BUILTIN_VEC_PACK_SFIX256
,
25581 IX86_BUILTIN_CRC32QI
,
25582 IX86_BUILTIN_CRC32HI
,
25583 IX86_BUILTIN_CRC32SI
,
25584 IX86_BUILTIN_CRC32DI
,
25586 IX86_BUILTIN_PCMPESTRI128
,
25587 IX86_BUILTIN_PCMPESTRM128
,
25588 IX86_BUILTIN_PCMPESTRA128
,
25589 IX86_BUILTIN_PCMPESTRC128
,
25590 IX86_BUILTIN_PCMPESTRO128
,
25591 IX86_BUILTIN_PCMPESTRS128
,
25592 IX86_BUILTIN_PCMPESTRZ128
,
25593 IX86_BUILTIN_PCMPISTRI128
,
25594 IX86_BUILTIN_PCMPISTRM128
,
25595 IX86_BUILTIN_PCMPISTRA128
,
25596 IX86_BUILTIN_PCMPISTRC128
,
25597 IX86_BUILTIN_PCMPISTRO128
,
25598 IX86_BUILTIN_PCMPISTRS128
,
25599 IX86_BUILTIN_PCMPISTRZ128
,
25601 IX86_BUILTIN_PCMPGTQ
,
25603 /* AES instructions */
25604 IX86_BUILTIN_AESENC128
,
25605 IX86_BUILTIN_AESENCLAST128
,
25606 IX86_BUILTIN_AESDEC128
,
25607 IX86_BUILTIN_AESDECLAST128
,
25608 IX86_BUILTIN_AESIMC128
,
25609 IX86_BUILTIN_AESKEYGENASSIST128
,
25611 /* PCLMUL instruction */
25612 IX86_BUILTIN_PCLMULQDQ128
,
25615 IX86_BUILTIN_ADDPD256
,
25616 IX86_BUILTIN_ADDPS256
,
25617 IX86_BUILTIN_ADDSUBPD256
,
25618 IX86_BUILTIN_ADDSUBPS256
,
25619 IX86_BUILTIN_ANDPD256
,
25620 IX86_BUILTIN_ANDPS256
,
25621 IX86_BUILTIN_ANDNPD256
,
25622 IX86_BUILTIN_ANDNPS256
,
25623 IX86_BUILTIN_BLENDPD256
,
25624 IX86_BUILTIN_BLENDPS256
,
25625 IX86_BUILTIN_BLENDVPD256
,
25626 IX86_BUILTIN_BLENDVPS256
,
25627 IX86_BUILTIN_DIVPD256
,
25628 IX86_BUILTIN_DIVPS256
,
25629 IX86_BUILTIN_DPPS256
,
25630 IX86_BUILTIN_HADDPD256
,
25631 IX86_BUILTIN_HADDPS256
,
25632 IX86_BUILTIN_HSUBPD256
,
25633 IX86_BUILTIN_HSUBPS256
,
25634 IX86_BUILTIN_MAXPD256
,
25635 IX86_BUILTIN_MAXPS256
,
25636 IX86_BUILTIN_MINPD256
,
25637 IX86_BUILTIN_MINPS256
,
25638 IX86_BUILTIN_MULPD256
,
25639 IX86_BUILTIN_MULPS256
,
25640 IX86_BUILTIN_ORPD256
,
25641 IX86_BUILTIN_ORPS256
,
25642 IX86_BUILTIN_SHUFPD256
,
25643 IX86_BUILTIN_SHUFPS256
,
25644 IX86_BUILTIN_SUBPD256
,
25645 IX86_BUILTIN_SUBPS256
,
25646 IX86_BUILTIN_XORPD256
,
25647 IX86_BUILTIN_XORPS256
,
25648 IX86_BUILTIN_CMPSD
,
25649 IX86_BUILTIN_CMPSS
,
25650 IX86_BUILTIN_CMPPD
,
25651 IX86_BUILTIN_CMPPS
,
25652 IX86_BUILTIN_CMPPD256
,
25653 IX86_BUILTIN_CMPPS256
,
25654 IX86_BUILTIN_CVTDQ2PD256
,
25655 IX86_BUILTIN_CVTDQ2PS256
,
25656 IX86_BUILTIN_CVTPD2PS256
,
25657 IX86_BUILTIN_CVTPS2DQ256
,
25658 IX86_BUILTIN_CVTPS2PD256
,
25659 IX86_BUILTIN_CVTTPD2DQ256
,
25660 IX86_BUILTIN_CVTPD2DQ256
,
25661 IX86_BUILTIN_CVTTPS2DQ256
,
25662 IX86_BUILTIN_EXTRACTF128PD256
,
25663 IX86_BUILTIN_EXTRACTF128PS256
,
25664 IX86_BUILTIN_EXTRACTF128SI256
,
25665 IX86_BUILTIN_VZEROALL
,
25666 IX86_BUILTIN_VZEROUPPER
,
25667 IX86_BUILTIN_VPERMILVARPD
,
25668 IX86_BUILTIN_VPERMILVARPS
,
25669 IX86_BUILTIN_VPERMILVARPD256
,
25670 IX86_BUILTIN_VPERMILVARPS256
,
25671 IX86_BUILTIN_VPERMILPD
,
25672 IX86_BUILTIN_VPERMILPS
,
25673 IX86_BUILTIN_VPERMILPD256
,
25674 IX86_BUILTIN_VPERMILPS256
,
25675 IX86_BUILTIN_VPERMIL2PD
,
25676 IX86_BUILTIN_VPERMIL2PS
,
25677 IX86_BUILTIN_VPERMIL2PD256
,
25678 IX86_BUILTIN_VPERMIL2PS256
,
25679 IX86_BUILTIN_VPERM2F128PD256
,
25680 IX86_BUILTIN_VPERM2F128PS256
,
25681 IX86_BUILTIN_VPERM2F128SI256
,
25682 IX86_BUILTIN_VBROADCASTSS
,
25683 IX86_BUILTIN_VBROADCASTSD256
,
25684 IX86_BUILTIN_VBROADCASTSS256
,
25685 IX86_BUILTIN_VBROADCASTPD256
,
25686 IX86_BUILTIN_VBROADCASTPS256
,
25687 IX86_BUILTIN_VINSERTF128PD256
,
25688 IX86_BUILTIN_VINSERTF128PS256
,
25689 IX86_BUILTIN_VINSERTF128SI256
,
25690 IX86_BUILTIN_LOADUPD256
,
25691 IX86_BUILTIN_LOADUPS256
,
25692 IX86_BUILTIN_STOREUPD256
,
25693 IX86_BUILTIN_STOREUPS256
,
25694 IX86_BUILTIN_LDDQU256
,
25695 IX86_BUILTIN_MOVNTDQ256
,
25696 IX86_BUILTIN_MOVNTPD256
,
25697 IX86_BUILTIN_MOVNTPS256
,
25698 IX86_BUILTIN_LOADDQU256
,
25699 IX86_BUILTIN_STOREDQU256
,
25700 IX86_BUILTIN_MASKLOADPD
,
25701 IX86_BUILTIN_MASKLOADPS
,
25702 IX86_BUILTIN_MASKSTOREPD
,
25703 IX86_BUILTIN_MASKSTOREPS
,
25704 IX86_BUILTIN_MASKLOADPD256
,
25705 IX86_BUILTIN_MASKLOADPS256
,
25706 IX86_BUILTIN_MASKSTOREPD256
,
25707 IX86_BUILTIN_MASKSTOREPS256
,
25708 IX86_BUILTIN_MOVSHDUP256
,
25709 IX86_BUILTIN_MOVSLDUP256
,
25710 IX86_BUILTIN_MOVDDUP256
,
25712 IX86_BUILTIN_SQRTPD256
,
25713 IX86_BUILTIN_SQRTPS256
,
25714 IX86_BUILTIN_SQRTPS_NR256
,
25715 IX86_BUILTIN_RSQRTPS256
,
25716 IX86_BUILTIN_RSQRTPS_NR256
,
25718 IX86_BUILTIN_RCPPS256
,
25720 IX86_BUILTIN_ROUNDPD256
,
25721 IX86_BUILTIN_ROUNDPS256
,
25723 IX86_BUILTIN_FLOORPD256
,
25724 IX86_BUILTIN_CEILPD256
,
25725 IX86_BUILTIN_TRUNCPD256
,
25726 IX86_BUILTIN_RINTPD256
,
25727 IX86_BUILTIN_ROUNDPD_AZ256
,
25729 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25730 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25731 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25733 IX86_BUILTIN_FLOORPS256
,
25734 IX86_BUILTIN_CEILPS256
,
25735 IX86_BUILTIN_TRUNCPS256
,
25736 IX86_BUILTIN_RINTPS256
,
25737 IX86_BUILTIN_ROUNDPS_AZ256
,
25739 IX86_BUILTIN_FLOORPS_SFIX256
,
25740 IX86_BUILTIN_CEILPS_SFIX256
,
25741 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25743 IX86_BUILTIN_UNPCKHPD256
,
25744 IX86_BUILTIN_UNPCKLPD256
,
25745 IX86_BUILTIN_UNPCKHPS256
,
25746 IX86_BUILTIN_UNPCKLPS256
,
25748 IX86_BUILTIN_SI256_SI
,
25749 IX86_BUILTIN_PS256_PS
,
25750 IX86_BUILTIN_PD256_PD
,
25751 IX86_BUILTIN_SI_SI256
,
25752 IX86_BUILTIN_PS_PS256
,
25753 IX86_BUILTIN_PD_PD256
,
25755 IX86_BUILTIN_VTESTZPD
,
25756 IX86_BUILTIN_VTESTCPD
,
25757 IX86_BUILTIN_VTESTNZCPD
,
25758 IX86_BUILTIN_VTESTZPS
,
25759 IX86_BUILTIN_VTESTCPS
,
25760 IX86_BUILTIN_VTESTNZCPS
,
25761 IX86_BUILTIN_VTESTZPD256
,
25762 IX86_BUILTIN_VTESTCPD256
,
25763 IX86_BUILTIN_VTESTNZCPD256
,
25764 IX86_BUILTIN_VTESTZPS256
,
25765 IX86_BUILTIN_VTESTCPS256
,
25766 IX86_BUILTIN_VTESTNZCPS256
,
25767 IX86_BUILTIN_PTESTZ256
,
25768 IX86_BUILTIN_PTESTC256
,
25769 IX86_BUILTIN_PTESTNZC256
,
25771 IX86_BUILTIN_MOVMSKPD256
,
25772 IX86_BUILTIN_MOVMSKPS256
,
25775 IX86_BUILTIN_MPSADBW256
,
25776 IX86_BUILTIN_PABSB256
,
25777 IX86_BUILTIN_PABSW256
,
25778 IX86_BUILTIN_PABSD256
,
25779 IX86_BUILTIN_PACKSSDW256
,
25780 IX86_BUILTIN_PACKSSWB256
,
25781 IX86_BUILTIN_PACKUSDW256
,
25782 IX86_BUILTIN_PACKUSWB256
,
25783 IX86_BUILTIN_PADDB256
,
25784 IX86_BUILTIN_PADDW256
,
25785 IX86_BUILTIN_PADDD256
,
25786 IX86_BUILTIN_PADDQ256
,
25787 IX86_BUILTIN_PADDSB256
,
25788 IX86_BUILTIN_PADDSW256
,
25789 IX86_BUILTIN_PADDUSB256
,
25790 IX86_BUILTIN_PADDUSW256
,
25791 IX86_BUILTIN_PALIGNR256
,
25792 IX86_BUILTIN_AND256I
,
25793 IX86_BUILTIN_ANDNOT256I
,
25794 IX86_BUILTIN_PAVGB256
,
25795 IX86_BUILTIN_PAVGW256
,
25796 IX86_BUILTIN_PBLENDVB256
,
25797 IX86_BUILTIN_PBLENDVW256
,
25798 IX86_BUILTIN_PCMPEQB256
,
25799 IX86_BUILTIN_PCMPEQW256
,
25800 IX86_BUILTIN_PCMPEQD256
,
25801 IX86_BUILTIN_PCMPEQQ256
,
25802 IX86_BUILTIN_PCMPGTB256
,
25803 IX86_BUILTIN_PCMPGTW256
,
25804 IX86_BUILTIN_PCMPGTD256
,
25805 IX86_BUILTIN_PCMPGTQ256
,
25806 IX86_BUILTIN_PHADDW256
,
25807 IX86_BUILTIN_PHADDD256
,
25808 IX86_BUILTIN_PHADDSW256
,
25809 IX86_BUILTIN_PHSUBW256
,
25810 IX86_BUILTIN_PHSUBD256
,
25811 IX86_BUILTIN_PHSUBSW256
,
25812 IX86_BUILTIN_PMADDUBSW256
,
25813 IX86_BUILTIN_PMADDWD256
,
25814 IX86_BUILTIN_PMAXSB256
,
25815 IX86_BUILTIN_PMAXSW256
,
25816 IX86_BUILTIN_PMAXSD256
,
25817 IX86_BUILTIN_PMAXUB256
,
25818 IX86_BUILTIN_PMAXUW256
,
25819 IX86_BUILTIN_PMAXUD256
,
25820 IX86_BUILTIN_PMINSB256
,
25821 IX86_BUILTIN_PMINSW256
,
25822 IX86_BUILTIN_PMINSD256
,
25823 IX86_BUILTIN_PMINUB256
,
25824 IX86_BUILTIN_PMINUW256
,
25825 IX86_BUILTIN_PMINUD256
,
25826 IX86_BUILTIN_PMOVMSKB256
,
25827 IX86_BUILTIN_PMOVSXBW256
,
25828 IX86_BUILTIN_PMOVSXBD256
,
25829 IX86_BUILTIN_PMOVSXBQ256
,
25830 IX86_BUILTIN_PMOVSXWD256
,
25831 IX86_BUILTIN_PMOVSXWQ256
,
25832 IX86_BUILTIN_PMOVSXDQ256
,
25833 IX86_BUILTIN_PMOVZXBW256
,
25834 IX86_BUILTIN_PMOVZXBD256
,
25835 IX86_BUILTIN_PMOVZXBQ256
,
25836 IX86_BUILTIN_PMOVZXWD256
,
25837 IX86_BUILTIN_PMOVZXWQ256
,
25838 IX86_BUILTIN_PMOVZXDQ256
,
25839 IX86_BUILTIN_PMULDQ256
,
25840 IX86_BUILTIN_PMULHRSW256
,
25841 IX86_BUILTIN_PMULHUW256
,
25842 IX86_BUILTIN_PMULHW256
,
25843 IX86_BUILTIN_PMULLW256
,
25844 IX86_BUILTIN_PMULLD256
,
25845 IX86_BUILTIN_PMULUDQ256
,
25846 IX86_BUILTIN_POR256
,
25847 IX86_BUILTIN_PSADBW256
,
25848 IX86_BUILTIN_PSHUFB256
,
25849 IX86_BUILTIN_PSHUFD256
,
25850 IX86_BUILTIN_PSHUFHW256
,
25851 IX86_BUILTIN_PSHUFLW256
,
25852 IX86_BUILTIN_PSIGNB256
,
25853 IX86_BUILTIN_PSIGNW256
,
25854 IX86_BUILTIN_PSIGND256
,
25855 IX86_BUILTIN_PSLLDQI256
,
25856 IX86_BUILTIN_PSLLWI256
,
25857 IX86_BUILTIN_PSLLW256
,
25858 IX86_BUILTIN_PSLLDI256
,
25859 IX86_BUILTIN_PSLLD256
,
25860 IX86_BUILTIN_PSLLQI256
,
25861 IX86_BUILTIN_PSLLQ256
,
25862 IX86_BUILTIN_PSRAWI256
,
25863 IX86_BUILTIN_PSRAW256
,
25864 IX86_BUILTIN_PSRADI256
,
25865 IX86_BUILTIN_PSRAD256
,
25866 IX86_BUILTIN_PSRLDQI256
,
25867 IX86_BUILTIN_PSRLWI256
,
25868 IX86_BUILTIN_PSRLW256
,
25869 IX86_BUILTIN_PSRLDI256
,
25870 IX86_BUILTIN_PSRLD256
,
25871 IX86_BUILTIN_PSRLQI256
,
25872 IX86_BUILTIN_PSRLQ256
,
25873 IX86_BUILTIN_PSUBB256
,
25874 IX86_BUILTIN_PSUBW256
,
25875 IX86_BUILTIN_PSUBD256
,
25876 IX86_BUILTIN_PSUBQ256
,
25877 IX86_BUILTIN_PSUBSB256
,
25878 IX86_BUILTIN_PSUBSW256
,
25879 IX86_BUILTIN_PSUBUSB256
,
25880 IX86_BUILTIN_PSUBUSW256
,
25881 IX86_BUILTIN_PUNPCKHBW256
,
25882 IX86_BUILTIN_PUNPCKHWD256
,
25883 IX86_BUILTIN_PUNPCKHDQ256
,
25884 IX86_BUILTIN_PUNPCKHQDQ256
,
25885 IX86_BUILTIN_PUNPCKLBW256
,
25886 IX86_BUILTIN_PUNPCKLWD256
,
25887 IX86_BUILTIN_PUNPCKLDQ256
,
25888 IX86_BUILTIN_PUNPCKLQDQ256
,
25889 IX86_BUILTIN_PXOR256
,
25890 IX86_BUILTIN_MOVNTDQA256
,
25891 IX86_BUILTIN_VBROADCASTSS_PS
,
25892 IX86_BUILTIN_VBROADCASTSS_PS256
,
25893 IX86_BUILTIN_VBROADCASTSD_PD256
,
25894 IX86_BUILTIN_VBROADCASTSI256
,
25895 IX86_BUILTIN_PBLENDD256
,
25896 IX86_BUILTIN_PBLENDD128
,
25897 IX86_BUILTIN_PBROADCASTB256
,
25898 IX86_BUILTIN_PBROADCASTW256
,
25899 IX86_BUILTIN_PBROADCASTD256
,
25900 IX86_BUILTIN_PBROADCASTQ256
,
25901 IX86_BUILTIN_PBROADCASTB128
,
25902 IX86_BUILTIN_PBROADCASTW128
,
25903 IX86_BUILTIN_PBROADCASTD128
,
25904 IX86_BUILTIN_PBROADCASTQ128
,
25905 IX86_BUILTIN_VPERMVARSI256
,
25906 IX86_BUILTIN_VPERMDF256
,
25907 IX86_BUILTIN_VPERMVARSF256
,
25908 IX86_BUILTIN_VPERMDI256
,
25909 IX86_BUILTIN_VPERMTI256
,
25910 IX86_BUILTIN_VEXTRACT128I256
,
25911 IX86_BUILTIN_VINSERT128I256
,
25912 IX86_BUILTIN_MASKLOADD
,
25913 IX86_BUILTIN_MASKLOADQ
,
25914 IX86_BUILTIN_MASKLOADD256
,
25915 IX86_BUILTIN_MASKLOADQ256
,
25916 IX86_BUILTIN_MASKSTORED
,
25917 IX86_BUILTIN_MASKSTOREQ
,
25918 IX86_BUILTIN_MASKSTORED256
,
25919 IX86_BUILTIN_MASKSTOREQ256
,
25920 IX86_BUILTIN_PSLLVV4DI
,
25921 IX86_BUILTIN_PSLLVV2DI
,
25922 IX86_BUILTIN_PSLLVV8SI
,
25923 IX86_BUILTIN_PSLLVV4SI
,
25924 IX86_BUILTIN_PSRAVV8SI
,
25925 IX86_BUILTIN_PSRAVV4SI
,
25926 IX86_BUILTIN_PSRLVV4DI
,
25927 IX86_BUILTIN_PSRLVV2DI
,
25928 IX86_BUILTIN_PSRLVV8SI
,
25929 IX86_BUILTIN_PSRLVV4SI
,
25931 IX86_BUILTIN_GATHERSIV2DF
,
25932 IX86_BUILTIN_GATHERSIV4DF
,
25933 IX86_BUILTIN_GATHERDIV2DF
,
25934 IX86_BUILTIN_GATHERDIV4DF
,
25935 IX86_BUILTIN_GATHERSIV4SF
,
25936 IX86_BUILTIN_GATHERSIV8SF
,
25937 IX86_BUILTIN_GATHERDIV4SF
,
25938 IX86_BUILTIN_GATHERDIV8SF
,
25939 IX86_BUILTIN_GATHERSIV2DI
,
25940 IX86_BUILTIN_GATHERSIV4DI
,
25941 IX86_BUILTIN_GATHERDIV2DI
,
25942 IX86_BUILTIN_GATHERDIV4DI
,
25943 IX86_BUILTIN_GATHERSIV4SI
,
25944 IX86_BUILTIN_GATHERSIV8SI
,
25945 IX86_BUILTIN_GATHERDIV4SI
,
25946 IX86_BUILTIN_GATHERDIV8SI
,
25948 /* Alternate 4 element gather for the vectorizer where
25949 all operands are 32-byte wide. */
25950 IX86_BUILTIN_GATHERALTSIV4DF
,
25951 IX86_BUILTIN_GATHERALTDIV8SF
,
25952 IX86_BUILTIN_GATHERALTSIV4DI
,
25953 IX86_BUILTIN_GATHERALTDIV8SI
,
25955 /* TFmode support builtins. */
25957 IX86_BUILTIN_HUGE_VALQ
,
25958 IX86_BUILTIN_FABSQ
,
25959 IX86_BUILTIN_COPYSIGNQ
,
25961 /* Vectorizer support builtins. */
25962 IX86_BUILTIN_CPYSGNPS
,
25963 IX86_BUILTIN_CPYSGNPD
,
25964 IX86_BUILTIN_CPYSGNPS256
,
25965 IX86_BUILTIN_CPYSGNPD256
,
25967 /* FMA4 instructions. */
25968 IX86_BUILTIN_VFMADDSS
,
25969 IX86_BUILTIN_VFMADDSD
,
25970 IX86_BUILTIN_VFMADDPS
,
25971 IX86_BUILTIN_VFMADDPD
,
25972 IX86_BUILTIN_VFMADDPS256
,
25973 IX86_BUILTIN_VFMADDPD256
,
25974 IX86_BUILTIN_VFMADDSUBPS
,
25975 IX86_BUILTIN_VFMADDSUBPD
,
25976 IX86_BUILTIN_VFMADDSUBPS256
,
25977 IX86_BUILTIN_VFMADDSUBPD256
,
25979 /* FMA3 instructions. */
25980 IX86_BUILTIN_VFMADDSS3
,
25981 IX86_BUILTIN_VFMADDSD3
,
25983 /* XOP instructions. */
25984 IX86_BUILTIN_VPCMOV
,
25985 IX86_BUILTIN_VPCMOV_V2DI
,
25986 IX86_BUILTIN_VPCMOV_V4SI
,
25987 IX86_BUILTIN_VPCMOV_V8HI
,
25988 IX86_BUILTIN_VPCMOV_V16QI
,
25989 IX86_BUILTIN_VPCMOV_V4SF
,
25990 IX86_BUILTIN_VPCMOV_V2DF
,
25991 IX86_BUILTIN_VPCMOV256
,
25992 IX86_BUILTIN_VPCMOV_V4DI256
,
25993 IX86_BUILTIN_VPCMOV_V8SI256
,
25994 IX86_BUILTIN_VPCMOV_V16HI256
,
25995 IX86_BUILTIN_VPCMOV_V32QI256
,
25996 IX86_BUILTIN_VPCMOV_V8SF256
,
25997 IX86_BUILTIN_VPCMOV_V4DF256
,
25999 IX86_BUILTIN_VPPERM
,
26001 IX86_BUILTIN_VPMACSSWW
,
26002 IX86_BUILTIN_VPMACSWW
,
26003 IX86_BUILTIN_VPMACSSWD
,
26004 IX86_BUILTIN_VPMACSWD
,
26005 IX86_BUILTIN_VPMACSSDD
,
26006 IX86_BUILTIN_VPMACSDD
,
26007 IX86_BUILTIN_VPMACSSDQL
,
26008 IX86_BUILTIN_VPMACSSDQH
,
26009 IX86_BUILTIN_VPMACSDQL
,
26010 IX86_BUILTIN_VPMACSDQH
,
26011 IX86_BUILTIN_VPMADCSSWD
,
26012 IX86_BUILTIN_VPMADCSWD
,
26014 IX86_BUILTIN_VPHADDBW
,
26015 IX86_BUILTIN_VPHADDBD
,
26016 IX86_BUILTIN_VPHADDBQ
,
26017 IX86_BUILTIN_VPHADDWD
,
26018 IX86_BUILTIN_VPHADDWQ
,
26019 IX86_BUILTIN_VPHADDDQ
,
26020 IX86_BUILTIN_VPHADDUBW
,
26021 IX86_BUILTIN_VPHADDUBD
,
26022 IX86_BUILTIN_VPHADDUBQ
,
26023 IX86_BUILTIN_VPHADDUWD
,
26024 IX86_BUILTIN_VPHADDUWQ
,
26025 IX86_BUILTIN_VPHADDUDQ
,
26026 IX86_BUILTIN_VPHSUBBW
,
26027 IX86_BUILTIN_VPHSUBWD
,
26028 IX86_BUILTIN_VPHSUBDQ
,
26030 IX86_BUILTIN_VPROTB
,
26031 IX86_BUILTIN_VPROTW
,
26032 IX86_BUILTIN_VPROTD
,
26033 IX86_BUILTIN_VPROTQ
,
26034 IX86_BUILTIN_VPROTB_IMM
,
26035 IX86_BUILTIN_VPROTW_IMM
,
26036 IX86_BUILTIN_VPROTD_IMM
,
26037 IX86_BUILTIN_VPROTQ_IMM
,
26039 IX86_BUILTIN_VPSHLB
,
26040 IX86_BUILTIN_VPSHLW
,
26041 IX86_BUILTIN_VPSHLD
,
26042 IX86_BUILTIN_VPSHLQ
,
26043 IX86_BUILTIN_VPSHAB
,
26044 IX86_BUILTIN_VPSHAW
,
26045 IX86_BUILTIN_VPSHAD
,
26046 IX86_BUILTIN_VPSHAQ
,
26048 IX86_BUILTIN_VFRCZSS
,
26049 IX86_BUILTIN_VFRCZSD
,
26050 IX86_BUILTIN_VFRCZPS
,
26051 IX86_BUILTIN_VFRCZPD
,
26052 IX86_BUILTIN_VFRCZPS256
,
26053 IX86_BUILTIN_VFRCZPD256
,
26055 IX86_BUILTIN_VPCOMEQUB
,
26056 IX86_BUILTIN_VPCOMNEUB
,
26057 IX86_BUILTIN_VPCOMLTUB
,
26058 IX86_BUILTIN_VPCOMLEUB
,
26059 IX86_BUILTIN_VPCOMGTUB
,
26060 IX86_BUILTIN_VPCOMGEUB
,
26061 IX86_BUILTIN_VPCOMFALSEUB
,
26062 IX86_BUILTIN_VPCOMTRUEUB
,
26064 IX86_BUILTIN_VPCOMEQUW
,
26065 IX86_BUILTIN_VPCOMNEUW
,
26066 IX86_BUILTIN_VPCOMLTUW
,
26067 IX86_BUILTIN_VPCOMLEUW
,
26068 IX86_BUILTIN_VPCOMGTUW
,
26069 IX86_BUILTIN_VPCOMGEUW
,
26070 IX86_BUILTIN_VPCOMFALSEUW
,
26071 IX86_BUILTIN_VPCOMTRUEUW
,
26073 IX86_BUILTIN_VPCOMEQUD
,
26074 IX86_BUILTIN_VPCOMNEUD
,
26075 IX86_BUILTIN_VPCOMLTUD
,
26076 IX86_BUILTIN_VPCOMLEUD
,
26077 IX86_BUILTIN_VPCOMGTUD
,
26078 IX86_BUILTIN_VPCOMGEUD
,
26079 IX86_BUILTIN_VPCOMFALSEUD
,
26080 IX86_BUILTIN_VPCOMTRUEUD
,
26082 IX86_BUILTIN_VPCOMEQUQ
,
26083 IX86_BUILTIN_VPCOMNEUQ
,
26084 IX86_BUILTIN_VPCOMLTUQ
,
26085 IX86_BUILTIN_VPCOMLEUQ
,
26086 IX86_BUILTIN_VPCOMGTUQ
,
26087 IX86_BUILTIN_VPCOMGEUQ
,
26088 IX86_BUILTIN_VPCOMFALSEUQ
,
26089 IX86_BUILTIN_VPCOMTRUEUQ
,
26091 IX86_BUILTIN_VPCOMEQB
,
26092 IX86_BUILTIN_VPCOMNEB
,
26093 IX86_BUILTIN_VPCOMLTB
,
26094 IX86_BUILTIN_VPCOMLEB
,
26095 IX86_BUILTIN_VPCOMGTB
,
26096 IX86_BUILTIN_VPCOMGEB
,
26097 IX86_BUILTIN_VPCOMFALSEB
,
26098 IX86_BUILTIN_VPCOMTRUEB
,
26100 IX86_BUILTIN_VPCOMEQW
,
26101 IX86_BUILTIN_VPCOMNEW
,
26102 IX86_BUILTIN_VPCOMLTW
,
26103 IX86_BUILTIN_VPCOMLEW
,
26104 IX86_BUILTIN_VPCOMGTW
,
26105 IX86_BUILTIN_VPCOMGEW
,
26106 IX86_BUILTIN_VPCOMFALSEW
,
26107 IX86_BUILTIN_VPCOMTRUEW
,
26109 IX86_BUILTIN_VPCOMEQD
,
26110 IX86_BUILTIN_VPCOMNED
,
26111 IX86_BUILTIN_VPCOMLTD
,
26112 IX86_BUILTIN_VPCOMLED
,
26113 IX86_BUILTIN_VPCOMGTD
,
26114 IX86_BUILTIN_VPCOMGED
,
26115 IX86_BUILTIN_VPCOMFALSED
,
26116 IX86_BUILTIN_VPCOMTRUED
,
26118 IX86_BUILTIN_VPCOMEQQ
,
26119 IX86_BUILTIN_VPCOMNEQ
,
26120 IX86_BUILTIN_VPCOMLTQ
,
26121 IX86_BUILTIN_VPCOMLEQ
,
26122 IX86_BUILTIN_VPCOMGTQ
,
26123 IX86_BUILTIN_VPCOMGEQ
,
26124 IX86_BUILTIN_VPCOMFALSEQ
,
26125 IX86_BUILTIN_VPCOMTRUEQ
,
26127 /* LWP instructions. */
26128 IX86_BUILTIN_LLWPCB
,
26129 IX86_BUILTIN_SLWPCB
,
26130 IX86_BUILTIN_LWPVAL32
,
26131 IX86_BUILTIN_LWPVAL64
,
26132 IX86_BUILTIN_LWPINS32
,
26133 IX86_BUILTIN_LWPINS64
,
26138 IX86_BUILTIN_XBEGIN
,
26140 IX86_BUILTIN_XABORT
,
26141 IX86_BUILTIN_XTEST
,
26143 /* BMI instructions. */
26144 IX86_BUILTIN_BEXTR32
,
26145 IX86_BUILTIN_BEXTR64
,
26148 /* TBM instructions. */
26149 IX86_BUILTIN_BEXTRI32
,
26150 IX86_BUILTIN_BEXTRI64
,
26152 /* BMI2 instructions. */
26153 IX86_BUILTIN_BZHI32
,
26154 IX86_BUILTIN_BZHI64
,
26155 IX86_BUILTIN_PDEP32
,
26156 IX86_BUILTIN_PDEP64
,
26157 IX86_BUILTIN_PEXT32
,
26158 IX86_BUILTIN_PEXT64
,
26160 /* ADX instructions. */
26161 IX86_BUILTIN_ADDCARRYX32
,
26162 IX86_BUILTIN_ADDCARRYX64
,
26164 /* FSGSBASE instructions. */
26165 IX86_BUILTIN_RDFSBASE32
,
26166 IX86_BUILTIN_RDFSBASE64
,
26167 IX86_BUILTIN_RDGSBASE32
,
26168 IX86_BUILTIN_RDGSBASE64
,
26169 IX86_BUILTIN_WRFSBASE32
,
26170 IX86_BUILTIN_WRFSBASE64
,
26171 IX86_BUILTIN_WRGSBASE32
,
26172 IX86_BUILTIN_WRGSBASE64
,
26174 /* RDRND instructions. */
26175 IX86_BUILTIN_RDRAND16_STEP
,
26176 IX86_BUILTIN_RDRAND32_STEP
,
26177 IX86_BUILTIN_RDRAND64_STEP
,
26179 /* RDSEED instructions. */
26180 IX86_BUILTIN_RDSEED16_STEP
,
26181 IX86_BUILTIN_RDSEED32_STEP
,
26182 IX86_BUILTIN_RDSEED64_STEP
,
26184 /* F16C instructions. */
26185 IX86_BUILTIN_CVTPH2PS
,
26186 IX86_BUILTIN_CVTPH2PS256
,
26187 IX86_BUILTIN_CVTPS2PH
,
26188 IX86_BUILTIN_CVTPS2PH256
,
26190 /* CFString built-in for darwin */
26191 IX86_BUILTIN_CFSTRING
,
26193 /* Builtins to get CPU type and supported features. */
26194 IX86_BUILTIN_CPU_INIT
,
26195 IX86_BUILTIN_CPU_IS
,
26196 IX86_BUILTIN_CPU_SUPPORTS
,
26201 /* Table for the ix86 builtin decls. */
26202 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26204 /* Table of all of the builtin functions that are possible with different ISA's
26205 but are waiting to be built until a function is declared to use that
26207 struct builtin_isa
{
26208 const char *name
; /* function name */
26209 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26210 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26211 bool const_p
; /* true if the declaration is constant */
26212 bool set_and_not_built_p
;
26215 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26218 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26219 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26220 function decl in the ix86_builtins array. Returns the function decl or
26221 NULL_TREE, if the builtin was not added.
26223 If the front end has a special hook for builtin functions, delay adding
26224 builtin functions that aren't in the current ISA until the ISA is changed
26225 with function specific optimization. Doing so, can save about 300K for the
26226 default compiler. When the builtin is expanded, check at that time whether
26229 If the front end doesn't have a special hook, record all builtins, even if
26230 it isn't an instruction set in the current ISA in case the user uses
26231 function specific options for a different ISA, so that we don't get scope
26232 errors if a builtin is added in the middle of a function scope. */
26235 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26236 enum ix86_builtin_func_type tcode
,
26237 enum ix86_builtins code
)
26239 tree decl
= NULL_TREE
;
26241 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26243 ix86_builtins_isa
[(int) code
].isa
= mask
;
26245 mask
&= ~OPTION_MASK_ISA_64BIT
;
26247 || (mask
& ix86_isa_flags
) != 0
26248 || (lang_hooks
.builtin_function
26249 == lang_hooks
.builtin_function_ext_scope
))
26252 tree type
= ix86_get_builtin_func_type (tcode
);
26253 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26255 ix86_builtins
[(int) code
] = decl
;
26256 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26260 ix86_builtins
[(int) code
] = NULL_TREE
;
26261 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26262 ix86_builtins_isa
[(int) code
].name
= name
;
26263 ix86_builtins_isa
[(int) code
].const_p
= false;
26264 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26271 /* Like def_builtin, but also marks the function decl "const". */
26274 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26275 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26277 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26279 TREE_READONLY (decl
) = 1;
26281 ix86_builtins_isa
[(int) code
].const_p
= true;
26286 /* Add any new builtin functions for a given ISA that may not have been
26287 declared. This saves a bit of space compared to adding all of the
26288 declarations to the tree, even if we didn't use them. */
26291 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26295 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26297 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26298 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26302 /* Don't define the builtin again. */
26303 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26305 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26306 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26307 type
, i
, BUILT_IN_MD
, NULL
,
26310 ix86_builtins
[i
] = decl
;
26311 if (ix86_builtins_isa
[i
].const_p
)
26312 TREE_READONLY (decl
) = 1;
26317 /* Bits for builtin_description.flag. */
26319 /* Set when we don't support the comparison natively, and should
26320 swap_comparison in order to support it. */
26321 #define BUILTIN_DESC_SWAP_OPERANDS 1
26323 struct builtin_description
26325 const HOST_WIDE_INT mask
;
26326 const enum insn_code icode
;
26327 const char *const name
;
26328 const enum ix86_builtins code
;
26329 const enum rtx_code comparison
;
26333 static const struct builtin_description bdesc_comi
[] =
26335 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26336 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26337 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26338 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26339 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26340 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26341 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26342 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26343 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26344 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26345 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26346 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26347 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26348 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26349 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26350 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26351 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26352 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26353 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26354 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26355 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26356 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26357 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26358 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26361 static const struct builtin_description bdesc_pcmpestr
[] =
26364 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26365 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26366 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26367 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26368 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26369 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26370 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26373 static const struct builtin_description bdesc_pcmpistr
[] =
26376 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26377 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26378 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26379 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26380 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26381 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26382 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26385 /* Special builtins with variable number of arguments. */
26386 static const struct builtin_description bdesc_special_args
[] =
26388 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26389 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26390 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26393 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26396 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26399 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26400 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26401 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26403 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26404 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26405 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26406 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26408 /* SSE or 3DNow!A */
26409 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26410 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26413 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26414 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26415 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26416 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26417 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26418 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26419 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26420 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26421 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26422 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26424 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26425 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26428 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26431 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26434 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26435 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26438 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26439 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26441 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26442 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26443 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26444 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26445 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26447 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26448 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26449 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26450 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26451 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26452 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26453 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26455 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26456 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26457 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26459 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26460 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26461 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26462 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26463 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26464 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26465 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26466 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26469 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26470 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26471 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26472 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26473 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26474 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26475 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26476 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26477 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26479 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26480 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26481 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26482 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26483 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26484 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26487 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26488 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26489 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26490 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26491 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26492 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26493 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26494 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26497 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26498 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26499 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26502 /* Builtins with variable number of arguments. */
26503 static const struct builtin_description bdesc_args
[] =
26505 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26506 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26507 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26508 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26509 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26510 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26511 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26514 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26515 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26516 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26517 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26518 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26519 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26521 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26522 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26523 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26524 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26525 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26526 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26527 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26528 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26530 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26531 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26533 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26534 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26535 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26536 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26538 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26539 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26540 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26541 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26542 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26543 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26545 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26546 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26547 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26548 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26549 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26550 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26552 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26553 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26554 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26556 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26558 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26559 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26560 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26561 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26562 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26563 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26565 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26566 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26567 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26568 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26569 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26570 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26572 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26573 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26574 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26575 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26578 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26579 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26580 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26581 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26583 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26584 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26585 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26586 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26587 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26588 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26589 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26590 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26591 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26592 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26593 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26594 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26595 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26596 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26597 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26600 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26601 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26602 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26603 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26604 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26605 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26608 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26609 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26610 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26611 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26612 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26613 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26614 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26615 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26616 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26617 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26618 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26619 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26621 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26623 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26624 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26625 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26626 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26627 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26628 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26629 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26630 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26632 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26633 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26634 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26635 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26636 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26637 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26638 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26639 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26640 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26641 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26642 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26643 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26644 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26645 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26646 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26647 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26648 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26649 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26650 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26651 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26652 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26653 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26655 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26656 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26657 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26658 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26660 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26661 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26662 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26663 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26665 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26667 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26668 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26669 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26670 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26671 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26673 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26674 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26675 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26677 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26679 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26680 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26681 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26683 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26684 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26686 /* SSE MMX or 3Dnow!A */
26687 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26688 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26689 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26691 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26692 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26693 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26694 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26696 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26697 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26699 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26702 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26704 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26705 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26706 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26707 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26708 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26710 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26711 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26712 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26713 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26714 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26716 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26718 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26719 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26720 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26721 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26723 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26724 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26725 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26727 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26728 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26729 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26730 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26731 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26732 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26733 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26736 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26738 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26739 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26740 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26741 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26743 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26744 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26745 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26746 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26747 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26748 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26749 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26750 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26751 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26752 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26753 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26755 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26757 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26758 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26759 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26762 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26763 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26764 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26765 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26767 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26769 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26770 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26771 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26773 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26775 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26777 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26778 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26779 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26782 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26786 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26787 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26789 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26790 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26791 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26794 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26796 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26797 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26798 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26799 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26801 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26802 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26804 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26805 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26806 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26807 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26808 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26809 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26813 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26814 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26817 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26818 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26819 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26820 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26821 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26822 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26823 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26825 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26826 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26827 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26829 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26830 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26832 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26833 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26835 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26837 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26838 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26839 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26840 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26842 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26843 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26844 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26845 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26846 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26847 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26848 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26850 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26851 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26852 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26853 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26854 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26855 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26856 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26858 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26859 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26860 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26861 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26863 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26864 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26865 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26867 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26869 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26872 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26873 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26876 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26877 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26879 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26880 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26881 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26882 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26883 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26884 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26887 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26888 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26889 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26890 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26891 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26892 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26894 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26895 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26896 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26897 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26898 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26899 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26900 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26901 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26902 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26903 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26904 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26905 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26906 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26907 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26908 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26909 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26910 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26911 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26912 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26913 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26914 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26915 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26916 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26917 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26920 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26921 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26924 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26925 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26926 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26927 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26928 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26929 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26930 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26931 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26932 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26933 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26935 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26936 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26937 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26938 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26939 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26940 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26941 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26942 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26943 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26944 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26945 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26946 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26947 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26949 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26950 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26951 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26952 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26953 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26954 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26955 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26956 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26957 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26958 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26959 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26960 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26963 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26964 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26965 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26966 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26968 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26969 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26970 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26971 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26973 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26974 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26976 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26977 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26979 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26980 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26981 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26982 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26984 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26985 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26987 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26988 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26990 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26991 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26992 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26995 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26996 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26997 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26998 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26999 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27002 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27003 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27004 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27005 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27008 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27009 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27011 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27012 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27013 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27014 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27017 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27020 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27021 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27022 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27023 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27024 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27025 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27026 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27027 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27028 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27029 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27030 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27031 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27032 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27033 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27034 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27035 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27036 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27037 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27038 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27039 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27040 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27041 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27042 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27043 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27044 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27045 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27047 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
27048 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
27049 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
27050 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27052 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27053 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27054 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
27055 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
27056 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27057 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27058 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27059 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27060 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27061 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27062 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27063 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27064 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27065 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
27066 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
27067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
27068 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
27069 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
27070 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
27071 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27072 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
27073 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27074 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27075 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27076 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27077 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27078 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27079 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27080 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27081 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27082 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27083 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27084 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27085 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27087 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27088 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27089 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27091 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27092 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27093 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27094 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27095 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27097 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27099 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27100 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27102 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27103 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27104 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27105 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27107 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27108 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27110 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27111 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27113 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27114 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27115 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27116 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27118 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27119 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27121 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27122 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27124 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27125 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27126 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27127 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27129 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27130 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27131 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27132 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27133 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27134 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27136 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27137 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27138 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27139 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27140 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27141 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27142 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27143 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27144 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27145 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27146 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27147 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27148 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27149 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27150 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27152 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27153 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27155 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27156 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27158 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27161 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27162 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27163 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27164 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27165 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27166 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27167 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27168 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27169 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27170 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27171 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27172 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27173 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27174 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27175 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27176 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27177 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27178 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27179 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27180 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27181 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27182 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27183 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27184 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27185 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27186 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27187 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27188 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27189 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27190 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27191 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27192 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27193 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27194 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27195 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27196 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27197 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27198 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27199 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27200 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27201 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27202 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27203 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27204 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27205 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27206 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27207 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27208 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27209 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27210 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27211 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27212 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27213 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27214 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27215 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27216 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27217 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27218 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27219 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27220 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27221 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27222 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27223 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27224 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27225 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27226 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27227 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27228 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27229 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27230 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27231 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27232 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27233 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27234 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27235 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27236 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27237 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27238 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27239 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27240 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27241 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27242 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27243 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27244 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27245 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27246 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27247 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27248 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27249 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27250 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27251 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27252 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27253 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27254 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27255 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27256 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27257 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27258 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27259 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27260 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27261 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27262 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27263 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27264 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27265 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27266 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27267 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27268 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27269 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27270 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27271 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27272 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27273 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27274 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27275 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27276 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27277 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27278 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27279 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27280 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27281 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27282 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27283 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27284 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27285 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27286 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27287 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27288 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27289 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27290 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27291 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27292 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27293 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27294 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27295 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27296 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27297 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27298 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27299 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27300 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27301 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27302 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27303 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27304 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27305 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27306 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27308 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27311 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27312 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27313 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27316 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27317 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27320 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27321 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27322 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27323 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27326 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27327 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27328 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27329 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27330 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27331 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27334 /* FMA4 and XOP. */
27335 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27336 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27337 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27338 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27339 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27340 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27341 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27342 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27343 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27344 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27345 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27346 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27347 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27348 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27349 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27350 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27351 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27352 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27353 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27354 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27355 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27356 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27357 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27358 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27359 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27360 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27361 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27362 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27363 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27364 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27365 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27366 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27367 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27368 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27369 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27370 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27371 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27372 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27373 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27374 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27375 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27376 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27377 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27378 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27379 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27380 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27381 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27382 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27383 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27384 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27385 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27386 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27388 static const struct builtin_description bdesc_multi_arg
[] =
27390 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27391 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27392 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27393 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27394 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27395 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27397 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27398 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27399 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27400 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27401 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27402 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27404 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27405 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27406 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27407 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27408 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27409 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27410 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27411 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27412 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27413 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27414 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27415 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27417 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27418 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27419 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27420 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27421 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27422 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27423 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27424 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27425 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27426 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27427 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27428 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27430 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27431 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27432 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27433 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27434 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27435 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27436 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27438 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27439 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27440 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27441 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27442 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27443 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27444 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27446 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27448 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27449 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27450 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27451 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27452 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27453 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27454 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27455 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27456 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27457 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27458 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27459 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27461 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27462 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27463 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27464 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27465 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27466 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27467 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27468 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27469 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27470 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27471 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27472 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27473 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27474 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27475 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27476 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27478 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27479 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27480 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27481 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27482 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27483 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27485 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27486 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27487 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27488 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27489 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27490 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27491 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27492 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27493 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27494 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27495 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27496 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27497 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27498 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27499 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27501 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27502 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27503 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27504 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27505 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27506 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27507 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27509 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27510 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27511 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27512 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27513 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27514 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27515 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27517 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27518 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27519 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27520 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27521 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27522 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27523 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27525 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27526 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27527 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27528 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27529 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27530 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27531 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27533 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27534 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27535 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27536 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27537 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27538 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27539 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27541 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27542 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27543 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27544 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27545 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27546 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27547 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27549 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27550 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27551 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27552 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27553 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27554 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27555 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27557 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27558 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27559 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27560 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27561 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27562 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27563 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27565 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27566 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27567 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27568 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27569 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27570 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27571 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27572 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27574 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27575 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27576 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27577 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27578 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27579 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27580 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27581 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27583 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27584 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27585 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27586 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27590 /* TM vector builtins. */
27592 /* Reuse the existing x86-specific `struct builtin_description' cause
27593 we're lazy. Add casts to make them fit. */
27594 static const struct builtin_description bdesc_tm
[] =
27596 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27597 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27598 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27599 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27600 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27601 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27602 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27604 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27605 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27606 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27607 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27608 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27609 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27610 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27612 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27613 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27614 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27615 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27616 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27617 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27618 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27620 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27621 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27622 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27625 /* TM callbacks. */
27627 /* Return the builtin decl needed to load a vector of TYPE. */
27630 ix86_builtin_tm_load (tree type
)
27632 if (TREE_CODE (type
) == VECTOR_TYPE
)
27634 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27637 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27639 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27641 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27647 /* Return the builtin decl needed to store a vector of TYPE. */
27650 ix86_builtin_tm_store (tree type
)
27652 if (TREE_CODE (type
) == VECTOR_TYPE
)
27654 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27657 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27659 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27661 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27667 /* Initialize the transactional memory vector load/store builtins. */
27670 ix86_init_tm_builtins (void)
27672 enum ix86_builtin_func_type ftype
;
27673 const struct builtin_description
*d
;
27676 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27677 tree attrs_log
, attrs_type_log
;
27682 /* If there are no builtins defined, we must be compiling in a
27683 language without trans-mem support. */
27684 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
27687 /* Use whatever attributes a normal TM load has. */
27688 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27689 attrs_load
= DECL_ATTRIBUTES (decl
);
27690 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27691 /* Use whatever attributes a normal TM store has. */
27692 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27693 attrs_store
= DECL_ATTRIBUTES (decl
);
27694 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27695 /* Use whatever attributes a normal TM log has. */
27696 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27697 attrs_log
= DECL_ATTRIBUTES (decl
);
27698 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27700 for (i
= 0, d
= bdesc_tm
;
27701 i
< ARRAY_SIZE (bdesc_tm
);
27704 if ((d
->mask
& ix86_isa_flags
) != 0
27705 || (lang_hooks
.builtin_function
27706 == lang_hooks
.builtin_function_ext_scope
))
27708 tree type
, attrs
, attrs_type
;
27709 enum built_in_function code
= (enum built_in_function
) d
->code
;
27711 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27712 type
= ix86_get_builtin_func_type (ftype
);
27714 if (BUILTIN_TM_LOAD_P (code
))
27716 attrs
= attrs_load
;
27717 attrs_type
= attrs_type_load
;
27719 else if (BUILTIN_TM_STORE_P (code
))
27721 attrs
= attrs_store
;
27722 attrs_type
= attrs_type_store
;
27727 attrs_type
= attrs_type_log
;
27729 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27730 /* The builtin without the prefix for
27731 calling it directly. */
27732 d
->name
+ strlen ("__builtin_"),
27734 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27735 set the TYPE_ATTRIBUTES. */
27736 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27738 set_builtin_decl (code
, decl
, false);
27743 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27744 in the current target ISA to allow the user to compile particular modules
27745 with different target specific options that differ from the command line
27748 ix86_init_mmx_sse_builtins (void)
27750 const struct builtin_description
* d
;
27751 enum ix86_builtin_func_type ftype
;
27754 /* Add all special builtins with variable number of operands. */
27755 for (i
= 0, d
= bdesc_special_args
;
27756 i
< ARRAY_SIZE (bdesc_special_args
);
27762 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27763 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27766 /* Add all builtins with variable number of operands. */
27767 for (i
= 0, d
= bdesc_args
;
27768 i
< ARRAY_SIZE (bdesc_args
);
27774 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27775 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27778 /* pcmpestr[im] insns. */
27779 for (i
= 0, d
= bdesc_pcmpestr
;
27780 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27783 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27784 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27786 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27787 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27790 /* pcmpistr[im] insns. */
27791 for (i
= 0, d
= bdesc_pcmpistr
;
27792 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27795 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27796 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27798 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27799 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27802 /* comi/ucomi insns. */
27803 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27805 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27806 ftype
= INT_FTYPE_V2DF_V2DF
;
27808 ftype
= INT_FTYPE_V4SF_V4SF
;
27809 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27813 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27814 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27815 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27816 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27818 /* SSE or 3DNow!A */
27819 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27820 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27821 IX86_BUILTIN_MASKMOVQ
);
27824 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27825 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27827 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27828 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27829 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27830 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27833 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27834 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27835 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27836 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27839 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27840 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27841 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27842 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27843 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27844 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27845 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27846 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27847 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27848 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27849 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27850 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27853 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27854 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27857 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27858 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27859 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27860 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27861 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27862 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27863 IX86_BUILTIN_RDRAND64_STEP
);
27866 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27867 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27868 IX86_BUILTIN_GATHERSIV2DF
);
27870 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27871 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27872 IX86_BUILTIN_GATHERSIV4DF
);
27874 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27875 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27876 IX86_BUILTIN_GATHERDIV2DF
);
27878 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27879 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27880 IX86_BUILTIN_GATHERDIV4DF
);
27882 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27883 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27884 IX86_BUILTIN_GATHERSIV4SF
);
27886 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27887 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27888 IX86_BUILTIN_GATHERSIV8SF
);
27890 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27891 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27892 IX86_BUILTIN_GATHERDIV4SF
);
27894 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27895 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27896 IX86_BUILTIN_GATHERDIV8SF
);
27898 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27899 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27900 IX86_BUILTIN_GATHERSIV2DI
);
27902 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27903 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27904 IX86_BUILTIN_GATHERSIV4DI
);
27906 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27907 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27908 IX86_BUILTIN_GATHERDIV2DI
);
27910 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27911 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27912 IX86_BUILTIN_GATHERDIV4DI
);
27914 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27915 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27916 IX86_BUILTIN_GATHERSIV4SI
);
27918 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27919 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27920 IX86_BUILTIN_GATHERSIV8SI
);
27922 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27923 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27924 IX86_BUILTIN_GATHERDIV4SI
);
27926 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27927 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27928 IX86_BUILTIN_GATHERDIV8SI
);
27930 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27931 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27932 IX86_BUILTIN_GATHERALTSIV4DF
);
27934 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27935 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27936 IX86_BUILTIN_GATHERALTDIV8SF
);
27938 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27939 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27940 IX86_BUILTIN_GATHERALTSIV4DI
);
27942 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27943 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27944 IX86_BUILTIN_GATHERALTDIV8SI
);
27947 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
27948 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
27950 /* MMX access to the vec_init patterns. */
27951 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27952 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27954 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27955 V4HI_FTYPE_HI_HI_HI_HI
,
27956 IX86_BUILTIN_VEC_INIT_V4HI
);
27958 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27959 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27960 IX86_BUILTIN_VEC_INIT_V8QI
);
27962 /* Access to the vec_extract patterns. */
27963 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27964 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27965 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27966 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27967 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27968 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27969 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27970 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27971 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27972 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27974 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27975 "__builtin_ia32_vec_ext_v4hi",
27976 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27978 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27979 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27981 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27982 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27984 /* Access to the vec_set patterns. */
27985 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27986 "__builtin_ia32_vec_set_v2di",
27987 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27989 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27990 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27992 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27993 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27995 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27996 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27998 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27999 "__builtin_ia32_vec_set_v4hi",
28000 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28002 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28003 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28006 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28007 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28008 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28009 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28010 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28011 "__builtin_ia32_rdseed_di_step",
28012 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28015 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28016 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
28017 def_builtin (OPTION_MASK_ISA_64BIT
,
28018 "__builtin_ia32_addcarryx_u64",
28019 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
28020 IX86_BUILTIN_ADDCARRYX64
);
28022 /* Add FMA4 multi-arg argument instructions */
28023 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28028 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28029 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28033 /* This builds the processor_model struct type defined in
28034 libgcc/config/i386/cpuinfo.c */
28037 build_processor_model_struct (void)
28039 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
28041 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
28043 tree type
= make_node (RECORD_TYPE
);
28045 /* The first 3 fields are unsigned int. */
28046 for (i
= 0; i
< 3; ++i
)
28048 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
28049 get_identifier (field_name
[i
]), unsigned_type_node
);
28050 if (field_chain
!= NULL_TREE
)
28051 DECL_CHAIN (field
) = field_chain
;
28052 field_chain
= field
;
28055 /* The last field is an array of unsigned integers of size one. */
28056 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
28057 get_identifier (field_name
[3]),
28058 build_array_type (unsigned_type_node
,
28059 build_index_type (size_one_node
)));
28060 if (field_chain
!= NULL_TREE
)
28061 DECL_CHAIN (field
) = field_chain
;
28062 field_chain
= field
;
28064 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
28068 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
28071 make_var_decl (tree type
, const char *name
)
28075 new_decl
= build_decl (UNKNOWN_LOCATION
,
28077 get_identifier(name
),
28080 DECL_EXTERNAL (new_decl
) = 1;
28081 TREE_STATIC (new_decl
) = 1;
28082 TREE_PUBLIC (new_decl
) = 1;
28083 DECL_INITIAL (new_decl
) = 0;
28084 DECL_ARTIFICIAL (new_decl
) = 0;
28085 DECL_PRESERVE_P (new_decl
) = 1;
28087 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
28088 assemble_variable (new_decl
, 0, 0, 0);
28093 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
28094 into an integer defined in libgcc/config/i386/cpuinfo.c */
28097 fold_builtin_cpu (tree fndecl
, tree
*args
)
28100 enum ix86_builtins fn_code
= (enum ix86_builtins
)
28101 DECL_FUNCTION_CODE (fndecl
);
28102 tree param_string_cst
= NULL
;
28104 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
28105 enum processor_features
28121 /* These are the values for vendor types and cpu types and subtypes
28122 in cpuinfo.c. Cpu types and subtypes should be subtracted by
28123 the corresponding start value. */
28124 enum processor_model
28134 M_CPU_SUBTYPE_START
,
28135 M_INTEL_COREI7_NEHALEM
,
28136 M_INTEL_COREI7_WESTMERE
,
28137 M_INTEL_COREI7_SANDYBRIDGE
,
28138 M_AMDFAM10H_BARCELONA
,
28139 M_AMDFAM10H_SHANGHAI
,
28140 M_AMDFAM10H_ISTANBUL
,
28141 M_AMDFAM15H_BDVER1
,
28145 static struct _arch_names_table
28147 const char *const name
;
28148 const enum processor_model model
;
28150 const arch_names_table
[] =
28153 {"intel", M_INTEL
},
28154 {"atom", M_INTEL_ATOM
},
28155 {"core2", M_INTEL_CORE2
},
28156 {"corei7", M_INTEL_COREI7
},
28157 {"nehalem", M_INTEL_COREI7_NEHALEM
},
28158 {"westmere", M_INTEL_COREI7_WESTMERE
},
28159 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
28160 {"amdfam10h", M_AMDFAM10H
},
28161 {"barcelona", M_AMDFAM10H_BARCELONA
},
28162 {"shanghai", M_AMDFAM10H_SHANGHAI
},
28163 {"istanbul", M_AMDFAM10H_ISTANBUL
},
28164 {"amdfam15h", M_AMDFAM15H
},
28165 {"bdver1", M_AMDFAM15H_BDVER1
},
28166 {"bdver2", M_AMDFAM15H_BDVER2
},
28169 static struct _isa_names_table
28171 const char *const name
;
28172 const enum processor_features feature
;
28174 const isa_names_table
[] =
28178 {"popcnt", F_POPCNT
},
28182 {"ssse3", F_SSSE3
},
28183 {"sse4.1", F_SSE4_1
},
28184 {"sse4.2", F_SSE4_2
},
28189 static tree __processor_model_type
= NULL_TREE
;
28190 static tree __cpu_model_var
= NULL_TREE
;
28192 if (__processor_model_type
== NULL_TREE
)
28193 __processor_model_type
= build_processor_model_struct ();
28195 if (__cpu_model_var
== NULL_TREE
)
28196 __cpu_model_var
= make_var_decl (__processor_model_type
,
28199 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
28201 param_string_cst
= *args
;
28202 while (param_string_cst
28203 && TREE_CODE (param_string_cst
) != STRING_CST
)
28205 /* *args must be a expr that can contain other EXPRS leading to a
28207 if (!EXPR_P (param_string_cst
))
28209 error ("Parameter to builtin must be a string constant or literal");
28210 return integer_zero_node
;
28212 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
28215 gcc_assert (param_string_cst
);
28217 if (fn_code
== IX86_BUILTIN_CPU_IS
)
28221 unsigned int field_val
= 0;
28222 unsigned int NUM_ARCH_NAMES
28223 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
28225 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
28226 if (strcmp (arch_names_table
[i
].name
,
28227 TREE_STRING_POINTER (param_string_cst
)) == 0)
28230 if (i
== NUM_ARCH_NAMES
)
28232 error ("Parameter to builtin not valid: %s",
28233 TREE_STRING_POINTER (param_string_cst
));
28234 return integer_zero_node
;
28237 field
= TYPE_FIELDS (__processor_model_type
);
28238 field_val
= arch_names_table
[i
].model
;
28240 /* CPU types are stored in the next field. */
28241 if (field_val
> M_CPU_TYPE_START
28242 && field_val
< M_CPU_SUBTYPE_START
)
28244 field
= DECL_CHAIN (field
);
28245 field_val
-= M_CPU_TYPE_START
;
28248 /* CPU subtypes are stored in the next field. */
28249 if (field_val
> M_CPU_SUBTYPE_START
)
28251 field
= DECL_CHAIN ( DECL_CHAIN (field
));
28252 field_val
-= M_CPU_SUBTYPE_START
;
28255 /* Get the appropriate field in __cpu_model. */
28256 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
28259 /* Check the value. */
28260 return build2 (EQ_EXPR
, unsigned_type_node
, ref
,
28261 build_int_cstu (unsigned_type_node
, field_val
));
28263 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28268 unsigned int field_val
= 0;
28269 unsigned int NUM_ISA_NAMES
28270 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
28272 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
28273 if (strcmp (isa_names_table
[i
].name
,
28274 TREE_STRING_POINTER (param_string_cst
)) == 0)
28277 if (i
== NUM_ISA_NAMES
)
28279 error ("Parameter to builtin not valid: %s",
28280 TREE_STRING_POINTER (param_string_cst
));
28281 return integer_zero_node
;
28284 field
= TYPE_FIELDS (__processor_model_type
);
28285 /* Get the last field, which is __cpu_features. */
28286 while (DECL_CHAIN (field
))
28287 field
= DECL_CHAIN (field
);
28289 /* Get the appropriate field: __cpu_model.__cpu_features */
28290 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
28293 /* Access the 0th element of __cpu_features array. */
28294 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
28295 integer_zero_node
, NULL_TREE
, NULL_TREE
);
28297 field_val
= (1 << isa_names_table
[i
].feature
);
28298 /* Return __cpu_model.__cpu_features[0] & field_val */
28299 return build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
28300 build_int_cstu (unsigned_type_node
, field_val
));
28302 gcc_unreachable ();
28306 ix86_fold_builtin (tree fndecl
, int n_args
,
28307 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
28309 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
28311 enum ix86_builtins fn_code
= (enum ix86_builtins
)
28312 DECL_FUNCTION_CODE (fndecl
);
28313 if (fn_code
== IX86_BUILTIN_CPU_IS
28314 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28316 gcc_assert (n_args
== 1);
28317 return fold_builtin_cpu (fndecl
, args
);
28321 #ifdef SUBTARGET_FOLD_BUILTIN
28322 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
28328 /* Make builtins to detect cpu type and features supported. NAME is
28329 the builtin name, CODE is the builtin code, and FTYPE is the function
28330 type of the builtin. */
28333 make_cpu_type_builtin (const char* name
, int code
,
28334 enum ix86_builtin_func_type ftype
, bool is_const
)
28339 type
= ix86_get_builtin_func_type (ftype
);
28340 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
28342 gcc_assert (decl
!= NULL_TREE
);
28343 ix86_builtins
[(int) code
] = decl
;
28344 TREE_READONLY (decl
) = is_const
;
28347 /* Make builtins to get CPU type and features supported. The created
28350 __builtin_cpu_init (), to detect cpu type and features,
28351 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
28352 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
28356 ix86_init_platform_type_builtins (void)
28358 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
28359 INT_FTYPE_VOID
, false);
28360 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
28361 INT_FTYPE_PCCHAR
, true);
28362 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
28363 INT_FTYPE_PCCHAR
, true);
28366 /* Internal method for ix86_init_builtins. */
28369 ix86_init_builtins_va_builtins_abi (void)
28371 tree ms_va_ref
, sysv_va_ref
;
28372 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
28373 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
28374 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
28375 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
28379 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
28380 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
28381 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
28383 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
28386 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28387 fnvoid_va_start_ms
=
28388 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28389 fnvoid_va_end_sysv
=
28390 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
28391 fnvoid_va_start_sysv
=
28392 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
28394 fnvoid_va_copy_ms
=
28395 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
28397 fnvoid_va_copy_sysv
=
28398 build_function_type_list (void_type_node
, sysv_va_ref
,
28399 sysv_va_ref
, NULL_TREE
);
28401 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
28402 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28403 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
28404 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28405 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
28406 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28407 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
28408 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28409 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
28410 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28411 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
28412 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28416 ix86_init_builtin_types (void)
28418 tree float128_type_node
, float80_type_node
;
28420 /* The __float80 type. */
28421 float80_type_node
= long_double_type_node
;
28422 if (TYPE_MODE (float80_type_node
) != XFmode
)
28424 /* The __float80 type. */
28425 float80_type_node
= make_node (REAL_TYPE
);
28427 TYPE_PRECISION (float80_type_node
) = 80;
28428 layout_type (float80_type_node
);
28430 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
28432 /* The __float128 type. */
28433 float128_type_node
= make_node (REAL_TYPE
);
28434 TYPE_PRECISION (float128_type_node
) = 128;
28435 layout_type (float128_type_node
);
28436 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
28438 /* This macro is built by i386-builtin-types.awk. */
28439 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
28443 ix86_init_builtins (void)
28447 ix86_init_builtin_types ();
28449 /* Builtins to get CPU type and features. */
28450 ix86_init_platform_type_builtins ();
28452 /* TFmode support builtins. */
28453 def_builtin_const (0, "__builtin_infq",
28454 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
28455 def_builtin_const (0, "__builtin_huge_valq",
28456 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
28458 /* We will expand them to normal call if SSE isn't available since
28459 they are used by libgcc. */
28460 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
28461 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
28462 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
28463 TREE_READONLY (t
) = 1;
28464 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
28466 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
28467 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
28468 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
28469 TREE_READONLY (t
) = 1;
28470 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
28472 ix86_init_tm_builtins ();
28473 ix86_init_mmx_sse_builtins ();
28476 ix86_init_builtins_va_builtins_abi ();
28478 #ifdef SUBTARGET_INIT_BUILTINS
28479 SUBTARGET_INIT_BUILTINS
;
28483 /* Return the ix86 builtin for CODE. */
28486 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
28488 if (code
>= IX86_BUILTIN_MAX
)
28489 return error_mark_node
;
28491 return ix86_builtins
[code
];
28494 /* Errors in the source file can cause expand_expr to return const0_rtx
28495 where we expect a vector. To avoid crashing, use one of the vector
28496 clear instructions. */
28498 safe_vector_operand (rtx x
, enum machine_mode mode
)
28500 if (x
== const0_rtx
)
28501 x
= CONST0_RTX (mode
);
28505 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
28508 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
28511 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28512 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28513 rtx op0
= expand_normal (arg0
);
28514 rtx op1
= expand_normal (arg1
);
28515 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28516 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28517 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
28519 if (VECTOR_MODE_P (mode0
))
28520 op0
= safe_vector_operand (op0
, mode0
);
28521 if (VECTOR_MODE_P (mode1
))
28522 op1
= safe_vector_operand (op1
, mode1
);
28524 if (optimize
|| !target
28525 || GET_MODE (target
) != tmode
28526 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28527 target
= gen_reg_rtx (tmode
);
28529 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
28531 rtx x
= gen_reg_rtx (V4SImode
);
28532 emit_insn (gen_sse2_loadd (x
, op1
));
28533 op1
= gen_lowpart (TImode
, x
);
28536 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28537 op0
= copy_to_mode_reg (mode0
, op0
);
28538 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
28539 op1
= copy_to_mode_reg (mode1
, op1
);
28541 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28550 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
28553 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
28554 enum ix86_builtin_func_type m_type
,
28555 enum rtx_code sub_code
)
28560 bool comparison_p
= false;
28562 bool last_arg_constant
= false;
28563 int num_memory
= 0;
28566 enum machine_mode mode
;
28569 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28573 case MULTI_ARG_4_DF2_DI_I
:
28574 case MULTI_ARG_4_DF2_DI_I1
:
28575 case MULTI_ARG_4_SF2_SI_I
:
28576 case MULTI_ARG_4_SF2_SI_I1
:
28578 last_arg_constant
= true;
28581 case MULTI_ARG_3_SF
:
28582 case MULTI_ARG_3_DF
:
28583 case MULTI_ARG_3_SF2
:
28584 case MULTI_ARG_3_DF2
:
28585 case MULTI_ARG_3_DI
:
28586 case MULTI_ARG_3_SI
:
28587 case MULTI_ARG_3_SI_DI
:
28588 case MULTI_ARG_3_HI
:
28589 case MULTI_ARG_3_HI_SI
:
28590 case MULTI_ARG_3_QI
:
28591 case MULTI_ARG_3_DI2
:
28592 case MULTI_ARG_3_SI2
:
28593 case MULTI_ARG_3_HI2
:
28594 case MULTI_ARG_3_QI2
:
28598 case MULTI_ARG_2_SF
:
28599 case MULTI_ARG_2_DF
:
28600 case MULTI_ARG_2_DI
:
28601 case MULTI_ARG_2_SI
:
28602 case MULTI_ARG_2_HI
:
28603 case MULTI_ARG_2_QI
:
28607 case MULTI_ARG_2_DI_IMM
:
28608 case MULTI_ARG_2_SI_IMM
:
28609 case MULTI_ARG_2_HI_IMM
:
28610 case MULTI_ARG_2_QI_IMM
:
28612 last_arg_constant
= true;
28615 case MULTI_ARG_1_SF
:
28616 case MULTI_ARG_1_DF
:
28617 case MULTI_ARG_1_SF2
:
28618 case MULTI_ARG_1_DF2
:
28619 case MULTI_ARG_1_DI
:
28620 case MULTI_ARG_1_SI
:
28621 case MULTI_ARG_1_HI
:
28622 case MULTI_ARG_1_QI
:
28623 case MULTI_ARG_1_SI_DI
:
28624 case MULTI_ARG_1_HI_DI
:
28625 case MULTI_ARG_1_HI_SI
:
28626 case MULTI_ARG_1_QI_DI
:
28627 case MULTI_ARG_1_QI_SI
:
28628 case MULTI_ARG_1_QI_HI
:
28632 case MULTI_ARG_2_DI_CMP
:
28633 case MULTI_ARG_2_SI_CMP
:
28634 case MULTI_ARG_2_HI_CMP
:
28635 case MULTI_ARG_2_QI_CMP
:
28637 comparison_p
= true;
28640 case MULTI_ARG_2_SF_TF
:
28641 case MULTI_ARG_2_DF_TF
:
28642 case MULTI_ARG_2_DI_TF
:
28643 case MULTI_ARG_2_SI_TF
:
28644 case MULTI_ARG_2_HI_TF
:
28645 case MULTI_ARG_2_QI_TF
:
28651 gcc_unreachable ();
28654 if (optimize
|| !target
28655 || GET_MODE (target
) != tmode
28656 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28657 target
= gen_reg_rtx (tmode
);
28659 gcc_assert (nargs
<= 4);
28661 for (i
= 0; i
< nargs
; i
++)
28663 tree arg
= CALL_EXPR_ARG (exp
, i
);
28664 rtx op
= expand_normal (arg
);
28665 int adjust
= (comparison_p
) ? 1 : 0;
28666 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
28668 if (last_arg_constant
&& i
== nargs
- 1)
28670 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
28672 enum insn_code new_icode
= icode
;
28675 case CODE_FOR_xop_vpermil2v2df3
:
28676 case CODE_FOR_xop_vpermil2v4sf3
:
28677 case CODE_FOR_xop_vpermil2v4df3
:
28678 case CODE_FOR_xop_vpermil2v8sf3
:
28679 error ("the last argument must be a 2-bit immediate");
28680 return gen_reg_rtx (tmode
);
28681 case CODE_FOR_xop_rotlv2di3
:
28682 new_icode
= CODE_FOR_rotlv2di3
;
28684 case CODE_FOR_xop_rotlv4si3
:
28685 new_icode
= CODE_FOR_rotlv4si3
;
28687 case CODE_FOR_xop_rotlv8hi3
:
28688 new_icode
= CODE_FOR_rotlv8hi3
;
28690 case CODE_FOR_xop_rotlv16qi3
:
28691 new_icode
= CODE_FOR_rotlv16qi3
;
28693 if (CONST_INT_P (op
))
28695 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
28696 op
= GEN_INT (INTVAL (op
) & mask
);
28697 gcc_checking_assert
28698 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
28702 gcc_checking_assert
28704 && insn_data
[new_icode
].operand
[0].mode
== tmode
28705 && insn_data
[new_icode
].operand
[1].mode
== tmode
28706 && insn_data
[new_icode
].operand
[2].mode
== mode
28707 && insn_data
[new_icode
].operand
[0].predicate
28708 == insn_data
[icode
].operand
[0].predicate
28709 && insn_data
[new_icode
].operand
[1].predicate
28710 == insn_data
[icode
].operand
[1].predicate
);
28716 gcc_unreachable ();
28723 if (VECTOR_MODE_P (mode
))
28724 op
= safe_vector_operand (op
, mode
);
28726 /* If we aren't optimizing, only allow one memory operand to be
28728 if (memory_operand (op
, mode
))
28731 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
28734 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
28736 op
= force_reg (mode
, op
);
28740 args
[i
].mode
= mode
;
28746 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
28751 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
28752 GEN_INT ((int)sub_code
));
28753 else if (! comparison_p
)
28754 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
28757 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
28761 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
28766 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
28770 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
28774 gcc_unreachable ();
28784 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28785 insns with vec_merge. */
28788 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
28792 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28793 rtx op1
, op0
= expand_normal (arg0
);
28794 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28795 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28797 if (optimize
|| !target
28798 || GET_MODE (target
) != tmode
28799 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28800 target
= gen_reg_rtx (tmode
);
28802 if (VECTOR_MODE_P (mode0
))
28803 op0
= safe_vector_operand (op0
, mode0
);
28805 if ((optimize
&& !register_operand (op0
, mode0
))
28806 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28807 op0
= copy_to_mode_reg (mode0
, op0
);
28810 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
28811 op1
= copy_to_mode_reg (mode0
, op1
);
28813 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28820 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28823 ix86_expand_sse_compare (const struct builtin_description
*d
,
28824 tree exp
, rtx target
, bool swap
)
28827 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28828 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28829 rtx op0
= expand_normal (arg0
);
28830 rtx op1
= expand_normal (arg1
);
28832 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28833 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28834 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28835 enum rtx_code comparison
= d
->comparison
;
28837 if (VECTOR_MODE_P (mode0
))
28838 op0
= safe_vector_operand (op0
, mode0
);
28839 if (VECTOR_MODE_P (mode1
))
28840 op1
= safe_vector_operand (op1
, mode1
);
28842 /* Swap operands if we have a comparison that isn't available in
28846 rtx tmp
= gen_reg_rtx (mode1
);
28847 emit_move_insn (tmp
, op1
);
28852 if (optimize
|| !target
28853 || GET_MODE (target
) != tmode
28854 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28855 target
= gen_reg_rtx (tmode
);
28857 if ((optimize
&& !register_operand (op0
, mode0
))
28858 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
28859 op0
= copy_to_mode_reg (mode0
, op0
);
28860 if ((optimize
&& !register_operand (op1
, mode1
))
28861 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
28862 op1
= copy_to_mode_reg (mode1
, op1
);
28864 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
28865 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28872 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28875 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28879 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28880 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28881 rtx op0
= expand_normal (arg0
);
28882 rtx op1
= expand_normal (arg1
);
28883 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28884 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28885 enum rtx_code comparison
= d
->comparison
;
28887 if (VECTOR_MODE_P (mode0
))
28888 op0
= safe_vector_operand (op0
, mode0
);
28889 if (VECTOR_MODE_P (mode1
))
28890 op1
= safe_vector_operand (op1
, mode1
);
28892 /* Swap operands if we have a comparison that isn't available in
28894 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28901 target
= gen_reg_rtx (SImode
);
28902 emit_move_insn (target
, const0_rtx
);
28903 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28905 if ((optimize
&& !register_operand (op0
, mode0
))
28906 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28907 op0
= copy_to_mode_reg (mode0
, op0
);
28908 if ((optimize
&& !register_operand (op1
, mode1
))
28909 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28910 op1
= copy_to_mode_reg (mode1
, op1
);
28912 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28916 emit_insn (gen_rtx_SET (VOIDmode
,
28917 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28918 gen_rtx_fmt_ee (comparison
, QImode
,
28922 return SUBREG_REG (target
);
28925 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28928 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28932 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28933 rtx op1
, op0
= expand_normal (arg0
);
28934 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28935 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28937 if (optimize
|| target
== 0
28938 || GET_MODE (target
) != tmode
28939 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28940 target
= gen_reg_rtx (tmode
);
28942 if (VECTOR_MODE_P (mode0
))
28943 op0
= safe_vector_operand (op0
, mode0
);
28945 if ((optimize
&& !register_operand (op0
, mode0
))
28946 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28947 op0
= copy_to_mode_reg (mode0
, op0
);
28949 op1
= GEN_INT (d
->comparison
);
28951 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28959 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28960 tree exp
, rtx target
)
28963 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28964 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28965 rtx op0
= expand_normal (arg0
);
28966 rtx op1
= expand_normal (arg1
);
28968 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28969 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28970 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28972 if (optimize
|| target
== 0
28973 || GET_MODE (target
) != tmode
28974 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28975 target
= gen_reg_rtx (tmode
);
28977 op0
= safe_vector_operand (op0
, mode0
);
28978 op1
= safe_vector_operand (op1
, mode1
);
28980 if ((optimize
&& !register_operand (op0
, mode0
))
28981 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28982 op0
= copy_to_mode_reg (mode0
, op0
);
28983 if ((optimize
&& !register_operand (op1
, mode1
))
28984 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28985 op1
= copy_to_mode_reg (mode1
, op1
);
28987 op2
= GEN_INT (d
->comparison
);
28989 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28996 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28999 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
29003 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29004 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29005 rtx op0
= expand_normal (arg0
);
29006 rtx op1
= expand_normal (arg1
);
29007 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
29008 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
29009 enum rtx_code comparison
= d
->comparison
;
29011 if (VECTOR_MODE_P (mode0
))
29012 op0
= safe_vector_operand (op0
, mode0
);
29013 if (VECTOR_MODE_P (mode1
))
29014 op1
= safe_vector_operand (op1
, mode1
);
29016 target
= gen_reg_rtx (SImode
);
29017 emit_move_insn (target
, const0_rtx
);
29018 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29020 if ((optimize
&& !register_operand (op0
, mode0
))
29021 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
29022 op0
= copy_to_mode_reg (mode0
, op0
);
29023 if ((optimize
&& !register_operand (op1
, mode1
))
29024 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
29025 op1
= copy_to_mode_reg (mode1
, op1
);
29027 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
29031 emit_insn (gen_rtx_SET (VOIDmode
,
29032 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29033 gen_rtx_fmt_ee (comparison
, QImode
,
29037 return SUBREG_REG (target
);
29040 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
29043 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
29044 tree exp
, rtx target
)
29047 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29048 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29049 tree arg2
= CALL_EXPR_ARG (exp
, 2);
29050 tree arg3
= CALL_EXPR_ARG (exp
, 3);
29051 tree arg4
= CALL_EXPR_ARG (exp
, 4);
29052 rtx scratch0
, scratch1
;
29053 rtx op0
= expand_normal (arg0
);
29054 rtx op1
= expand_normal (arg1
);
29055 rtx op2
= expand_normal (arg2
);
29056 rtx op3
= expand_normal (arg3
);
29057 rtx op4
= expand_normal (arg4
);
29058 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
29060 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
29061 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
29062 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
29063 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
29064 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
29065 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
29066 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
29068 if (VECTOR_MODE_P (modev2
))
29069 op0
= safe_vector_operand (op0
, modev2
);
29070 if (VECTOR_MODE_P (modev4
))
29071 op2
= safe_vector_operand (op2
, modev4
);
29073 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
29074 op0
= copy_to_mode_reg (modev2
, op0
);
29075 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
29076 op1
= copy_to_mode_reg (modei3
, op1
);
29077 if ((optimize
&& !register_operand (op2
, modev4
))
29078 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
29079 op2
= copy_to_mode_reg (modev4
, op2
);
29080 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
29081 op3
= copy_to_mode_reg (modei5
, op3
);
29083 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
29085 error ("the fifth argument must be an 8-bit immediate");
29089 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
29091 if (optimize
|| !target
29092 || GET_MODE (target
) != tmode0
29093 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
29094 target
= gen_reg_rtx (tmode0
);
29096 scratch1
= gen_reg_rtx (tmode1
);
29098 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
29100 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
29102 if (optimize
|| !target
29103 || GET_MODE (target
) != tmode1
29104 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
29105 target
= gen_reg_rtx (tmode1
);
29107 scratch0
= gen_reg_rtx (tmode0
);
29109 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
29113 gcc_assert (d
->flag
);
29115 scratch0
= gen_reg_rtx (tmode0
);
29116 scratch1
= gen_reg_rtx (tmode1
);
29118 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
29128 target
= gen_reg_rtx (SImode
);
29129 emit_move_insn (target
, const0_rtx
);
29130 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29133 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29134 gen_rtx_fmt_ee (EQ
, QImode
,
29135 gen_rtx_REG ((enum machine_mode
) d
->flag
,
29138 return SUBREG_REG (target
);
29145 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
29148 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
29149 tree exp
, rtx target
)
29152 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29153 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29154 tree arg2
= CALL_EXPR_ARG (exp
, 2);
29155 rtx scratch0
, scratch1
;
29156 rtx op0
= expand_normal (arg0
);
29157 rtx op1
= expand_normal (arg1
);
29158 rtx op2
= expand_normal (arg2
);
29159 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
29161 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
29162 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
29163 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
29164 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
29165 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
29167 if (VECTOR_MODE_P (modev2
))
29168 op0
= safe_vector_operand (op0
, modev2
);
29169 if (VECTOR_MODE_P (modev3
))
29170 op1
= safe_vector_operand (op1
, modev3
);
29172 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
29173 op0
= copy_to_mode_reg (modev2
, op0
);
29174 if ((optimize
&& !register_operand (op1
, modev3
))
29175 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
29176 op1
= copy_to_mode_reg (modev3
, op1
);
29178 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
29180 error ("the third argument must be an 8-bit immediate");
29184 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
29186 if (optimize
|| !target
29187 || GET_MODE (target
) != tmode0
29188 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
29189 target
= gen_reg_rtx (tmode0
);
29191 scratch1
= gen_reg_rtx (tmode1
);
29193 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
29195 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
29197 if (optimize
|| !target
29198 || GET_MODE (target
) != tmode1
29199 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
29200 target
= gen_reg_rtx (tmode1
);
29202 scratch0
= gen_reg_rtx (tmode0
);
29204 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
29208 gcc_assert (d
->flag
);
29210 scratch0
= gen_reg_rtx (tmode0
);
29211 scratch1
= gen_reg_rtx (tmode1
);
29213 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
29223 target
= gen_reg_rtx (SImode
);
29224 emit_move_insn (target
, const0_rtx
);
29225 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29228 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29229 gen_rtx_fmt_ee (EQ
, QImode
,
29230 gen_rtx_REG ((enum machine_mode
) d
->flag
,
29233 return SUBREG_REG (target
);
29239 /* Subroutine of ix86_expand_builtin to take care of insns with
29240 variable number of operands. */
29243 ix86_expand_args_builtin (const struct builtin_description
*d
,
29244 tree exp
, rtx target
)
29246 rtx pat
, real_target
;
29247 unsigned int i
, nargs
;
29248 unsigned int nargs_constant
= 0;
29249 int num_memory
= 0;
29253 enum machine_mode mode
;
29255 bool last_arg_count
= false;
29256 enum insn_code icode
= d
->icode
;
29257 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29258 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29259 enum machine_mode rmode
= VOIDmode
;
29261 enum rtx_code comparison
= d
->comparison
;
29263 switch ((enum ix86_builtin_func_type
) d
->flag
)
29265 case V2DF_FTYPE_V2DF_ROUND
:
29266 case V4DF_FTYPE_V4DF_ROUND
:
29267 case V4SF_FTYPE_V4SF_ROUND
:
29268 case V8SF_FTYPE_V8SF_ROUND
:
29269 case V4SI_FTYPE_V4SF_ROUND
:
29270 case V8SI_FTYPE_V8SF_ROUND
:
29271 return ix86_expand_sse_round (d
, exp
, target
);
29272 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
29273 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
29274 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
29275 case INT_FTYPE_V8SF_V8SF_PTEST
:
29276 case INT_FTYPE_V4DI_V4DI_PTEST
:
29277 case INT_FTYPE_V4DF_V4DF_PTEST
:
29278 case INT_FTYPE_V4SF_V4SF_PTEST
:
29279 case INT_FTYPE_V2DI_V2DI_PTEST
:
29280 case INT_FTYPE_V2DF_V2DF_PTEST
:
29281 return ix86_expand_sse_ptest (d
, exp
, target
);
29282 case FLOAT128_FTYPE_FLOAT128
:
29283 case FLOAT_FTYPE_FLOAT
:
29284 case INT_FTYPE_INT
:
29285 case UINT64_FTYPE_INT
:
29286 case UINT16_FTYPE_UINT16
:
29287 case INT64_FTYPE_INT64
:
29288 case INT64_FTYPE_V4SF
:
29289 case INT64_FTYPE_V2DF
:
29290 case INT_FTYPE_V16QI
:
29291 case INT_FTYPE_V8QI
:
29292 case INT_FTYPE_V8SF
:
29293 case INT_FTYPE_V4DF
:
29294 case INT_FTYPE_V4SF
:
29295 case INT_FTYPE_V2DF
:
29296 case INT_FTYPE_V32QI
:
29297 case V16QI_FTYPE_V16QI
:
29298 case V8SI_FTYPE_V8SF
:
29299 case V8SI_FTYPE_V4SI
:
29300 case V8HI_FTYPE_V8HI
:
29301 case V8HI_FTYPE_V16QI
:
29302 case V8QI_FTYPE_V8QI
:
29303 case V8SF_FTYPE_V8SF
:
29304 case V8SF_FTYPE_V8SI
:
29305 case V8SF_FTYPE_V4SF
:
29306 case V8SF_FTYPE_V8HI
:
29307 case V4SI_FTYPE_V4SI
:
29308 case V4SI_FTYPE_V16QI
:
29309 case V4SI_FTYPE_V4SF
:
29310 case V4SI_FTYPE_V8SI
:
29311 case V4SI_FTYPE_V8HI
:
29312 case V4SI_FTYPE_V4DF
:
29313 case V4SI_FTYPE_V2DF
:
29314 case V4HI_FTYPE_V4HI
:
29315 case V4DF_FTYPE_V4DF
:
29316 case V4DF_FTYPE_V4SI
:
29317 case V4DF_FTYPE_V4SF
:
29318 case V4DF_FTYPE_V2DF
:
29319 case V4SF_FTYPE_V4SF
:
29320 case V4SF_FTYPE_V4SI
:
29321 case V4SF_FTYPE_V8SF
:
29322 case V4SF_FTYPE_V4DF
:
29323 case V4SF_FTYPE_V8HI
:
29324 case V4SF_FTYPE_V2DF
:
29325 case V2DI_FTYPE_V2DI
:
29326 case V2DI_FTYPE_V16QI
:
29327 case V2DI_FTYPE_V8HI
:
29328 case V2DI_FTYPE_V4SI
:
29329 case V2DF_FTYPE_V2DF
:
29330 case V2DF_FTYPE_V4SI
:
29331 case V2DF_FTYPE_V4DF
:
29332 case V2DF_FTYPE_V4SF
:
29333 case V2DF_FTYPE_V2SI
:
29334 case V2SI_FTYPE_V2SI
:
29335 case V2SI_FTYPE_V4SF
:
29336 case V2SI_FTYPE_V2SF
:
29337 case V2SI_FTYPE_V2DF
:
29338 case V2SF_FTYPE_V2SF
:
29339 case V2SF_FTYPE_V2SI
:
29340 case V32QI_FTYPE_V32QI
:
29341 case V32QI_FTYPE_V16QI
:
29342 case V16HI_FTYPE_V16HI
:
29343 case V16HI_FTYPE_V8HI
:
29344 case V8SI_FTYPE_V8SI
:
29345 case V16HI_FTYPE_V16QI
:
29346 case V8SI_FTYPE_V16QI
:
29347 case V4DI_FTYPE_V16QI
:
29348 case V8SI_FTYPE_V8HI
:
29349 case V4DI_FTYPE_V8HI
:
29350 case V4DI_FTYPE_V4SI
:
29351 case V4DI_FTYPE_V2DI
:
29354 case V4SF_FTYPE_V4SF_VEC_MERGE
:
29355 case V2DF_FTYPE_V2DF_VEC_MERGE
:
29356 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
29357 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
29358 case V16QI_FTYPE_V16QI_V16QI
:
29359 case V16QI_FTYPE_V8HI_V8HI
:
29360 case V8QI_FTYPE_V8QI_V8QI
:
29361 case V8QI_FTYPE_V4HI_V4HI
:
29362 case V8HI_FTYPE_V8HI_V8HI
:
29363 case V8HI_FTYPE_V16QI_V16QI
:
29364 case V8HI_FTYPE_V4SI_V4SI
:
29365 case V8SF_FTYPE_V8SF_V8SF
:
29366 case V8SF_FTYPE_V8SF_V8SI
:
29367 case V4SI_FTYPE_V4SI_V4SI
:
29368 case V4SI_FTYPE_V8HI_V8HI
:
29369 case V4SI_FTYPE_V4SF_V4SF
:
29370 case V4SI_FTYPE_V2DF_V2DF
:
29371 case V4HI_FTYPE_V4HI_V4HI
:
29372 case V4HI_FTYPE_V8QI_V8QI
:
29373 case V4HI_FTYPE_V2SI_V2SI
:
29374 case V4DF_FTYPE_V4DF_V4DF
:
29375 case V4DF_FTYPE_V4DF_V4DI
:
29376 case V4SF_FTYPE_V4SF_V4SF
:
29377 case V4SF_FTYPE_V4SF_V4SI
:
29378 case V4SF_FTYPE_V4SF_V2SI
:
29379 case V4SF_FTYPE_V4SF_V2DF
:
29380 case V4SF_FTYPE_V4SF_DI
:
29381 case V4SF_FTYPE_V4SF_SI
:
29382 case V2DI_FTYPE_V2DI_V2DI
:
29383 case V2DI_FTYPE_V16QI_V16QI
:
29384 case V2DI_FTYPE_V4SI_V4SI
:
29385 case V2UDI_FTYPE_V4USI_V4USI
:
29386 case V2DI_FTYPE_V2DI_V16QI
:
29387 case V2DI_FTYPE_V2DF_V2DF
:
29388 case V2SI_FTYPE_V2SI_V2SI
:
29389 case V2SI_FTYPE_V4HI_V4HI
:
29390 case V2SI_FTYPE_V2SF_V2SF
:
29391 case V2DF_FTYPE_V2DF_V2DF
:
29392 case V2DF_FTYPE_V2DF_V4SF
:
29393 case V2DF_FTYPE_V2DF_V2DI
:
29394 case V2DF_FTYPE_V2DF_DI
:
29395 case V2DF_FTYPE_V2DF_SI
:
29396 case V2SF_FTYPE_V2SF_V2SF
:
29397 case V1DI_FTYPE_V1DI_V1DI
:
29398 case V1DI_FTYPE_V8QI_V8QI
:
29399 case V1DI_FTYPE_V2SI_V2SI
:
29400 case V32QI_FTYPE_V16HI_V16HI
:
29401 case V16HI_FTYPE_V8SI_V8SI
:
29402 case V32QI_FTYPE_V32QI_V32QI
:
29403 case V16HI_FTYPE_V32QI_V32QI
:
29404 case V16HI_FTYPE_V16HI_V16HI
:
29405 case V8SI_FTYPE_V4DF_V4DF
:
29406 case V8SI_FTYPE_V8SI_V8SI
:
29407 case V8SI_FTYPE_V16HI_V16HI
:
29408 case V4DI_FTYPE_V4DI_V4DI
:
29409 case V4DI_FTYPE_V8SI_V8SI
:
29410 case V4UDI_FTYPE_V8USI_V8USI
:
29411 if (comparison
== UNKNOWN
)
29412 return ix86_expand_binop_builtin (icode
, exp
, target
);
29415 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
29416 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
29417 gcc_assert (comparison
!= UNKNOWN
);
29421 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
29422 case V16HI_FTYPE_V16HI_SI_COUNT
:
29423 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
29424 case V8SI_FTYPE_V8SI_SI_COUNT
:
29425 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
29426 case V4DI_FTYPE_V4DI_INT_COUNT
:
29427 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
29428 case V8HI_FTYPE_V8HI_SI_COUNT
:
29429 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
29430 case V4SI_FTYPE_V4SI_SI_COUNT
:
29431 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
29432 case V4HI_FTYPE_V4HI_SI_COUNT
:
29433 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
29434 case V2DI_FTYPE_V2DI_SI_COUNT
:
29435 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
29436 case V2SI_FTYPE_V2SI_SI_COUNT
:
29437 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
29438 case V1DI_FTYPE_V1DI_SI_COUNT
:
29440 last_arg_count
= true;
29442 case UINT64_FTYPE_UINT64_UINT64
:
29443 case UINT_FTYPE_UINT_UINT
:
29444 case UINT_FTYPE_UINT_USHORT
:
29445 case UINT_FTYPE_UINT_UCHAR
:
29446 case UINT16_FTYPE_UINT16_INT
:
29447 case UINT8_FTYPE_UINT8_INT
:
29450 case V2DI_FTYPE_V2DI_INT_CONVERT
:
29453 nargs_constant
= 1;
29455 case V4DI_FTYPE_V4DI_INT_CONVERT
:
29458 nargs_constant
= 1;
29460 case V8HI_FTYPE_V8HI_INT
:
29461 case V8HI_FTYPE_V8SF_INT
:
29462 case V8HI_FTYPE_V4SF_INT
:
29463 case V8SF_FTYPE_V8SF_INT
:
29464 case V4SI_FTYPE_V4SI_INT
:
29465 case V4SI_FTYPE_V8SI_INT
:
29466 case V4HI_FTYPE_V4HI_INT
:
29467 case V4DF_FTYPE_V4DF_INT
:
29468 case V4SF_FTYPE_V4SF_INT
:
29469 case V4SF_FTYPE_V8SF_INT
:
29470 case V2DI_FTYPE_V2DI_INT
:
29471 case V2DF_FTYPE_V2DF_INT
:
29472 case V2DF_FTYPE_V4DF_INT
:
29473 case V16HI_FTYPE_V16HI_INT
:
29474 case V8SI_FTYPE_V8SI_INT
:
29475 case V4DI_FTYPE_V4DI_INT
:
29476 case V2DI_FTYPE_V4DI_INT
:
29478 nargs_constant
= 1;
29480 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
29481 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
29482 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
29483 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
29484 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
29485 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
29488 case V32QI_FTYPE_V32QI_V32QI_INT
:
29489 case V16HI_FTYPE_V16HI_V16HI_INT
:
29490 case V16QI_FTYPE_V16QI_V16QI_INT
:
29491 case V4DI_FTYPE_V4DI_V4DI_INT
:
29492 case V8HI_FTYPE_V8HI_V8HI_INT
:
29493 case V8SI_FTYPE_V8SI_V8SI_INT
:
29494 case V8SI_FTYPE_V8SI_V4SI_INT
:
29495 case V8SF_FTYPE_V8SF_V8SF_INT
:
29496 case V8SF_FTYPE_V8SF_V4SF_INT
:
29497 case V4SI_FTYPE_V4SI_V4SI_INT
:
29498 case V4DF_FTYPE_V4DF_V4DF_INT
:
29499 case V4DF_FTYPE_V4DF_V2DF_INT
:
29500 case V4SF_FTYPE_V4SF_V4SF_INT
:
29501 case V2DI_FTYPE_V2DI_V2DI_INT
:
29502 case V4DI_FTYPE_V4DI_V2DI_INT
:
29503 case V2DF_FTYPE_V2DF_V2DF_INT
:
29505 nargs_constant
= 1;
29507 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
29510 nargs_constant
= 1;
29512 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
29515 nargs_constant
= 1;
29517 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
29520 nargs_constant
= 1;
29522 case V2DI_FTYPE_V2DI_UINT_UINT
:
29524 nargs_constant
= 2;
29526 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
29527 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
29528 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
29529 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
29531 nargs_constant
= 1;
29533 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
29535 nargs_constant
= 2;
29537 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
29538 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
29542 gcc_unreachable ();
29545 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29547 if (comparison
!= UNKNOWN
)
29549 gcc_assert (nargs
== 2);
29550 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
29553 if (rmode
== VOIDmode
|| rmode
== tmode
)
29557 || GET_MODE (target
) != tmode
29558 || !insn_p
->operand
[0].predicate (target
, tmode
))
29559 target
= gen_reg_rtx (tmode
);
29560 real_target
= target
;
29564 target
= gen_reg_rtx (rmode
);
29565 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
29568 for (i
= 0; i
< nargs
; i
++)
29570 tree arg
= CALL_EXPR_ARG (exp
, i
);
29571 rtx op
= expand_normal (arg
);
29572 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29573 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29575 if (last_arg_count
&& (i
+ 1) == nargs
)
29577 /* SIMD shift insns take either an 8-bit immediate or
29578 register as count. But builtin functions take int as
29579 count. If count doesn't match, we put it in register. */
29582 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
29583 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
29584 op
= copy_to_reg (op
);
29587 else if ((nargs
- i
) <= nargs_constant
)
29592 case CODE_FOR_avx2_inserti128
:
29593 case CODE_FOR_avx2_extracti128
:
29594 error ("the last argument must be an 1-bit immediate");
29597 case CODE_FOR_sse4_1_roundsd
:
29598 case CODE_FOR_sse4_1_roundss
:
29600 case CODE_FOR_sse4_1_roundpd
:
29601 case CODE_FOR_sse4_1_roundps
:
29602 case CODE_FOR_avx_roundpd256
:
29603 case CODE_FOR_avx_roundps256
:
29605 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
29606 case CODE_FOR_sse4_1_roundps_sfix
:
29607 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
29608 case CODE_FOR_avx_roundps_sfix256
:
29610 case CODE_FOR_sse4_1_blendps
:
29611 case CODE_FOR_avx_blendpd256
:
29612 case CODE_FOR_avx_vpermilv4df
:
29613 error ("the last argument must be a 4-bit immediate");
29616 case CODE_FOR_sse4_1_blendpd
:
29617 case CODE_FOR_avx_vpermilv2df
:
29618 case CODE_FOR_xop_vpermil2v2df3
:
29619 case CODE_FOR_xop_vpermil2v4sf3
:
29620 case CODE_FOR_xop_vpermil2v4df3
:
29621 case CODE_FOR_xop_vpermil2v8sf3
:
29622 error ("the last argument must be a 2-bit immediate");
29625 case CODE_FOR_avx_vextractf128v4df
:
29626 case CODE_FOR_avx_vextractf128v8sf
:
29627 case CODE_FOR_avx_vextractf128v8si
:
29628 case CODE_FOR_avx_vinsertf128v4df
:
29629 case CODE_FOR_avx_vinsertf128v8sf
:
29630 case CODE_FOR_avx_vinsertf128v8si
:
29631 error ("the last argument must be a 1-bit immediate");
29634 case CODE_FOR_avx_vmcmpv2df3
:
29635 case CODE_FOR_avx_vmcmpv4sf3
:
29636 case CODE_FOR_avx_cmpv2df3
:
29637 case CODE_FOR_avx_cmpv4sf3
:
29638 case CODE_FOR_avx_cmpv4df3
:
29639 case CODE_FOR_avx_cmpv8sf3
:
29640 error ("the last argument must be a 5-bit immediate");
29644 switch (nargs_constant
)
29647 if ((nargs
- i
) == nargs_constant
)
29649 error ("the next to last argument must be an 8-bit immediate");
29653 error ("the last argument must be an 8-bit immediate");
29656 gcc_unreachable ();
29663 if (VECTOR_MODE_P (mode
))
29664 op
= safe_vector_operand (op
, mode
);
29666 /* If we aren't optimizing, only allow one memory operand to
29668 if (memory_operand (op
, mode
))
29671 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
29673 if (optimize
|| !match
|| num_memory
> 1)
29674 op
= copy_to_mode_reg (mode
, op
);
29678 op
= copy_to_reg (op
);
29679 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
29684 args
[i
].mode
= mode
;
29690 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
29693 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
29696 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29700 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29701 args
[2].op
, args
[3].op
);
29704 gcc_unreachable ();
29714 /* Subroutine of ix86_expand_builtin to take care of special insns
29715 with variable number of operands. */
29718 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
29719 tree exp
, rtx target
)
29723 unsigned int i
, nargs
, arg_adjust
, memory
;
29727 enum machine_mode mode
;
29729 enum insn_code icode
= d
->icode
;
29730 bool last_arg_constant
= false;
29731 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29732 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29733 enum { load
, store
} klass
;
29735 switch ((enum ix86_builtin_func_type
) d
->flag
)
29737 case VOID_FTYPE_VOID
:
29738 if (icode
== CODE_FOR_avx_vzeroupper
)
29739 target
= GEN_INT (vzeroupper_intrinsic
);
29740 emit_insn (GEN_FCN (icode
) (target
));
29742 case VOID_FTYPE_UINT64
:
29743 case VOID_FTYPE_UNSIGNED
:
29749 case INT_FTYPE_VOID
:
29750 case UINT64_FTYPE_VOID
:
29751 case UNSIGNED_FTYPE_VOID
:
29756 case UINT64_FTYPE_PUNSIGNED
:
29757 case V2DI_FTYPE_PV2DI
:
29758 case V4DI_FTYPE_PV4DI
:
29759 case V32QI_FTYPE_PCCHAR
:
29760 case V16QI_FTYPE_PCCHAR
:
29761 case V8SF_FTYPE_PCV4SF
:
29762 case V8SF_FTYPE_PCFLOAT
:
29763 case V4SF_FTYPE_PCFLOAT
:
29764 case V4DF_FTYPE_PCV2DF
:
29765 case V4DF_FTYPE_PCDOUBLE
:
29766 case V2DF_FTYPE_PCDOUBLE
:
29767 case VOID_FTYPE_PVOID
:
29772 case VOID_FTYPE_PV2SF_V4SF
:
29773 case VOID_FTYPE_PV4DI_V4DI
:
29774 case VOID_FTYPE_PV2DI_V2DI
:
29775 case VOID_FTYPE_PCHAR_V32QI
:
29776 case VOID_FTYPE_PCHAR_V16QI
:
29777 case VOID_FTYPE_PFLOAT_V8SF
:
29778 case VOID_FTYPE_PFLOAT_V4SF
:
29779 case VOID_FTYPE_PDOUBLE_V4DF
:
29780 case VOID_FTYPE_PDOUBLE_V2DF
:
29781 case VOID_FTYPE_PLONGLONG_LONGLONG
:
29782 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
29783 case VOID_FTYPE_PINT_INT
:
29786 /* Reserve memory operand for target. */
29787 memory
= ARRAY_SIZE (args
);
29789 case V4SF_FTYPE_V4SF_PCV2SF
:
29790 case V2DF_FTYPE_V2DF_PCDOUBLE
:
29795 case V8SF_FTYPE_PCV8SF_V8SI
:
29796 case V4DF_FTYPE_PCV4DF_V4DI
:
29797 case V4SF_FTYPE_PCV4SF_V4SI
:
29798 case V2DF_FTYPE_PCV2DF_V2DI
:
29799 case V8SI_FTYPE_PCV8SI_V8SI
:
29800 case V4DI_FTYPE_PCV4DI_V4DI
:
29801 case V4SI_FTYPE_PCV4SI_V4SI
:
29802 case V2DI_FTYPE_PCV2DI_V2DI
:
29807 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
29808 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
29809 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
29810 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
29811 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
29812 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
29813 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
29814 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
29817 /* Reserve memory operand for target. */
29818 memory
= ARRAY_SIZE (args
);
29820 case VOID_FTYPE_UINT_UINT_UINT
:
29821 case VOID_FTYPE_UINT64_UINT_UINT
:
29822 case UCHAR_FTYPE_UINT_UINT_UINT
:
29823 case UCHAR_FTYPE_UINT64_UINT_UINT
:
29826 memory
= ARRAY_SIZE (args
);
29827 last_arg_constant
= true;
29830 gcc_unreachable ();
29833 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29835 if (klass
== store
)
29837 arg
= CALL_EXPR_ARG (exp
, 0);
29838 op
= expand_normal (arg
);
29839 gcc_assert (target
== 0);
29842 if (GET_MODE (op
) != Pmode
)
29843 op
= convert_to_mode (Pmode
, op
, 1);
29844 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
29847 target
= force_reg (tmode
, op
);
29855 || !register_operand (target
, tmode
)
29856 || GET_MODE (target
) != tmode
)
29857 target
= gen_reg_rtx (tmode
);
29860 for (i
= 0; i
< nargs
; i
++)
29862 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29865 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
29866 op
= expand_normal (arg
);
29867 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29869 if (last_arg_constant
&& (i
+ 1) == nargs
)
29873 if (icode
== CODE_FOR_lwp_lwpvalsi3
29874 || icode
== CODE_FOR_lwp_lwpinssi3
29875 || icode
== CODE_FOR_lwp_lwpvaldi3
29876 || icode
== CODE_FOR_lwp_lwpinsdi3
)
29877 error ("the last argument must be a 32-bit immediate");
29879 error ("the last argument must be an 8-bit immediate");
29887 /* This must be the memory operand. */
29888 if (GET_MODE (op
) != Pmode
)
29889 op
= convert_to_mode (Pmode
, op
, 1);
29890 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29891 gcc_assert (GET_MODE (op
) == mode
29892 || GET_MODE (op
) == VOIDmode
);
29896 /* This must be register. */
29897 if (VECTOR_MODE_P (mode
))
29898 op
= safe_vector_operand (op
, mode
);
29900 gcc_assert (GET_MODE (op
) == mode
29901 || GET_MODE (op
) == VOIDmode
);
29902 op
= copy_to_mode_reg (mode
, op
);
29907 args
[i
].mode
= mode
;
29913 pat
= GEN_FCN (icode
) (target
);
29916 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29919 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29922 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29925 gcc_unreachable ();
29931 return klass
== store
? 0 : target
;
29934 /* Return the integer constant in ARG. Constrain it to be in the range
29935 of the subparts of VEC_TYPE; issue an error if not. */
29938 get_element_number (tree vec_type
, tree arg
)
29940 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29942 if (!host_integerp (arg
, 1)
29943 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29945 error ("selector must be an integer constant in the range 0..%wi", max
);
29952 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29953 ix86_expand_vector_init. We DO have language-level syntax for this, in
29954 the form of (type){ init-list }. Except that since we can't place emms
29955 instructions from inside the compiler, we can't allow the use of MMX
29956 registers unless the user explicitly asks for it. So we do *not* define
29957 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29958 we have builtins invoked by mmintrin.h that gives us license to emit
29959 these sorts of instructions. */
29962 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29964 enum machine_mode tmode
= TYPE_MODE (type
);
29965 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29966 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29967 rtvec v
= rtvec_alloc (n_elt
);
29969 gcc_assert (VECTOR_MODE_P (tmode
));
29970 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29972 for (i
= 0; i
< n_elt
; ++i
)
29974 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29975 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29978 if (!target
|| !register_operand (target
, tmode
))
29979 target
= gen_reg_rtx (tmode
);
29981 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29985 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29986 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29987 had a language-level syntax for referencing vector elements. */
29990 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29992 enum machine_mode tmode
, mode0
;
29997 arg0
= CALL_EXPR_ARG (exp
, 0);
29998 arg1
= CALL_EXPR_ARG (exp
, 1);
30000 op0
= expand_normal (arg0
);
30001 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
30003 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
30004 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
30005 gcc_assert (VECTOR_MODE_P (mode0
));
30007 op0
= force_reg (mode0
, op0
);
30009 if (optimize
|| !target
|| !register_operand (target
, tmode
))
30010 target
= gen_reg_rtx (tmode
);
30012 ix86_expand_vector_extract (true, target
, op0
, elt
);
30017 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
30018 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
30019 a language-level syntax for referencing vector elements. */
30022 ix86_expand_vec_set_builtin (tree exp
)
30024 enum machine_mode tmode
, mode1
;
30025 tree arg0
, arg1
, arg2
;
30027 rtx op0
, op1
, target
;
30029 arg0
= CALL_EXPR_ARG (exp
, 0);
30030 arg1
= CALL_EXPR_ARG (exp
, 1);
30031 arg2
= CALL_EXPR_ARG (exp
, 2);
30033 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
30034 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
30035 gcc_assert (VECTOR_MODE_P (tmode
));
30037 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
30038 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
30039 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
30041 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
30042 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
30044 op0
= force_reg (tmode
, op0
);
30045 op1
= force_reg (mode1
, op1
);
30047 /* OP0 is the source of these builtin functions and shouldn't be
30048 modified. Create a copy, use it and return it as target. */
30049 target
= gen_reg_rtx (tmode
);
30050 emit_move_insn (target
, op0
);
30051 ix86_expand_vector_set (true, target
, op1
, elt
);
30056 /* Expand an expression EXP that calls a built-in function,
30057 with result going to TARGET if that's convenient
30058 (and in mode MODE if that's convenient).
30059 SUBTARGET may be used as the target for computing one of EXP's operands.
30060 IGNORE is nonzero if the value is to be ignored. */
30063 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
30064 enum machine_mode mode ATTRIBUTE_UNUSED
,
30065 int ignore ATTRIBUTE_UNUSED
)
30067 const struct builtin_description
*d
;
30069 enum insn_code icode
;
30070 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
30071 tree arg0
, arg1
, arg2
, arg3
, arg4
;
30072 rtx op0
, op1
, op2
, op3
, op4
, pat
;
30073 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
30074 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
30076 /* For CPU builtins that can be folded, fold first and expand the fold. */
30079 case IX86_BUILTIN_CPU_INIT
:
30081 /* Make it call __cpu_indicator_init in libgcc. */
30082 tree call_expr
, fndecl
, type
;
30083 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
30084 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
30085 call_expr
= build_call_expr (fndecl
, 0);
30086 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
30088 case IX86_BUILTIN_CPU_IS
:
30089 case IX86_BUILTIN_CPU_SUPPORTS
:
30091 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30092 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
30093 gcc_assert (fold_expr
!= NULL_TREE
);
30094 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
30098 /* Determine whether the builtin function is available under the current ISA.
30099 Originally the builtin was not created if it wasn't applicable to the
30100 current ISA based on the command line switches. With function specific
30101 options, we need to check in the context of the function making the call
30102 whether it is supported. */
30103 if (ix86_builtins_isa
[fcode
].isa
30104 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
30106 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
30107 NULL
, (enum fpmath_unit
) 0, false);
30110 error ("%qE needs unknown isa option", fndecl
);
30113 gcc_assert (opts
!= NULL
);
30114 error ("%qE needs isa option %s", fndecl
, opts
);
30122 case IX86_BUILTIN_MASKMOVQ
:
30123 case IX86_BUILTIN_MASKMOVDQU
:
30124 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
30125 ? CODE_FOR_mmx_maskmovq
30126 : CODE_FOR_sse2_maskmovdqu
);
30127 /* Note the arg order is different from the operand order. */
30128 arg1
= CALL_EXPR_ARG (exp
, 0);
30129 arg2
= CALL_EXPR_ARG (exp
, 1);
30130 arg0
= CALL_EXPR_ARG (exp
, 2);
30131 op0
= expand_normal (arg0
);
30132 op1
= expand_normal (arg1
);
30133 op2
= expand_normal (arg2
);
30134 mode0
= insn_data
[icode
].operand
[0].mode
;
30135 mode1
= insn_data
[icode
].operand
[1].mode
;
30136 mode2
= insn_data
[icode
].operand
[2].mode
;
30138 if (GET_MODE (op0
) != Pmode
)
30139 op0
= convert_to_mode (Pmode
, op0
, 1);
30140 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
30142 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
30143 op0
= copy_to_mode_reg (mode0
, op0
);
30144 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
30145 op1
= copy_to_mode_reg (mode1
, op1
);
30146 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
30147 op2
= copy_to_mode_reg (mode2
, op2
);
30148 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
30154 case IX86_BUILTIN_LDMXCSR
:
30155 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
30156 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
30157 emit_move_insn (target
, op0
);
30158 emit_insn (gen_sse_ldmxcsr (target
));
30161 case IX86_BUILTIN_STMXCSR
:
30162 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
30163 emit_insn (gen_sse_stmxcsr (target
));
30164 return copy_to_mode_reg (SImode
, target
);
30166 case IX86_BUILTIN_CLFLUSH
:
30167 arg0
= CALL_EXPR_ARG (exp
, 0);
30168 op0
= expand_normal (arg0
);
30169 icode
= CODE_FOR_sse2_clflush
;
30170 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
30172 if (GET_MODE (op0
) != Pmode
)
30173 op0
= convert_to_mode (Pmode
, op0
, 1);
30174 op0
= force_reg (Pmode
, op0
);
30177 emit_insn (gen_sse2_clflush (op0
));
30180 case IX86_BUILTIN_MONITOR
:
30181 arg0
= CALL_EXPR_ARG (exp
, 0);
30182 arg1
= CALL_EXPR_ARG (exp
, 1);
30183 arg2
= CALL_EXPR_ARG (exp
, 2);
30184 op0
= expand_normal (arg0
);
30185 op1
= expand_normal (arg1
);
30186 op2
= expand_normal (arg2
);
30189 if (GET_MODE (op0
) != Pmode
)
30190 op0
= convert_to_mode (Pmode
, op0
, 1);
30191 op0
= force_reg (Pmode
, op0
);
30194 op1
= copy_to_mode_reg (SImode
, op1
);
30196 op2
= copy_to_mode_reg (SImode
, op2
);
30197 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
30200 case IX86_BUILTIN_MWAIT
:
30201 arg0
= CALL_EXPR_ARG (exp
, 0);
30202 arg1
= CALL_EXPR_ARG (exp
, 1);
30203 op0
= expand_normal (arg0
);
30204 op1
= expand_normal (arg1
);
30206 op0
= copy_to_mode_reg (SImode
, op0
);
30208 op1
= copy_to_mode_reg (SImode
, op1
);
30209 emit_insn (gen_sse3_mwait (op0
, op1
));
30212 case IX86_BUILTIN_VEC_INIT_V2SI
:
30213 case IX86_BUILTIN_VEC_INIT_V4HI
:
30214 case IX86_BUILTIN_VEC_INIT_V8QI
:
30215 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
30217 case IX86_BUILTIN_VEC_EXT_V2DF
:
30218 case IX86_BUILTIN_VEC_EXT_V2DI
:
30219 case IX86_BUILTIN_VEC_EXT_V4SF
:
30220 case IX86_BUILTIN_VEC_EXT_V4SI
:
30221 case IX86_BUILTIN_VEC_EXT_V8HI
:
30222 case IX86_BUILTIN_VEC_EXT_V2SI
:
30223 case IX86_BUILTIN_VEC_EXT_V4HI
:
30224 case IX86_BUILTIN_VEC_EXT_V16QI
:
30225 return ix86_expand_vec_ext_builtin (exp
, target
);
30227 case IX86_BUILTIN_VEC_SET_V2DI
:
30228 case IX86_BUILTIN_VEC_SET_V4SF
:
30229 case IX86_BUILTIN_VEC_SET_V4SI
:
30230 case IX86_BUILTIN_VEC_SET_V8HI
:
30231 case IX86_BUILTIN_VEC_SET_V4HI
:
30232 case IX86_BUILTIN_VEC_SET_V16QI
:
30233 return ix86_expand_vec_set_builtin (exp
);
30235 case IX86_BUILTIN_INFQ
:
30236 case IX86_BUILTIN_HUGE_VALQ
:
30238 REAL_VALUE_TYPE inf
;
30242 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
30244 tmp
= validize_mem (force_const_mem (mode
, tmp
));
30247 target
= gen_reg_rtx (mode
);
30249 emit_move_insn (target
, tmp
);
30253 case IX86_BUILTIN_LLWPCB
:
30254 arg0
= CALL_EXPR_ARG (exp
, 0);
30255 op0
= expand_normal (arg0
);
30256 icode
= CODE_FOR_lwp_llwpcb
;
30257 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
30259 if (GET_MODE (op0
) != Pmode
)
30260 op0
= convert_to_mode (Pmode
, op0
, 1);
30261 op0
= force_reg (Pmode
, op0
);
30263 emit_insn (gen_lwp_llwpcb (op0
));
30266 case IX86_BUILTIN_SLWPCB
:
30267 icode
= CODE_FOR_lwp_slwpcb
;
30269 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
30270 target
= gen_reg_rtx (Pmode
);
30271 emit_insn (gen_lwp_slwpcb (target
));
30274 case IX86_BUILTIN_BEXTRI32
:
30275 case IX86_BUILTIN_BEXTRI64
:
30276 arg0
= CALL_EXPR_ARG (exp
, 0);
30277 arg1
= CALL_EXPR_ARG (exp
, 1);
30278 op0
= expand_normal (arg0
);
30279 op1
= expand_normal (arg1
);
30280 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
30281 ? CODE_FOR_tbm_bextri_si
30282 : CODE_FOR_tbm_bextri_di
);
30283 if (!CONST_INT_P (op1
))
30285 error ("last argument must be an immediate");
30290 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
30291 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
30292 op1
= GEN_INT (length
);
30293 op2
= GEN_INT (lsb_index
);
30294 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
30300 case IX86_BUILTIN_RDRAND16_STEP
:
30301 icode
= CODE_FOR_rdrandhi_1
;
30305 case IX86_BUILTIN_RDRAND32_STEP
:
30306 icode
= CODE_FOR_rdrandsi_1
;
30310 case IX86_BUILTIN_RDRAND64_STEP
:
30311 icode
= CODE_FOR_rdranddi_1
;
30315 op0
= gen_reg_rtx (mode0
);
30316 emit_insn (GEN_FCN (icode
) (op0
));
30318 arg0
= CALL_EXPR_ARG (exp
, 0);
30319 op1
= expand_normal (arg0
);
30320 if (!address_operand (op1
, VOIDmode
))
30322 op1
= convert_memory_address (Pmode
, op1
);
30323 op1
= copy_addr_to_reg (op1
);
30325 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
30327 op1
= gen_reg_rtx (SImode
);
30328 emit_move_insn (op1
, CONST1_RTX (SImode
));
30330 /* Emit SImode conditional move. */
30331 if (mode0
== HImode
)
30333 op2
= gen_reg_rtx (SImode
);
30334 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
30336 else if (mode0
== SImode
)
30339 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
30342 target
= gen_reg_rtx (SImode
);
30344 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
30346 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30347 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
30350 case IX86_BUILTIN_RDSEED16_STEP
:
30351 icode
= CODE_FOR_rdseedhi_1
;
30355 case IX86_BUILTIN_RDSEED32_STEP
:
30356 icode
= CODE_FOR_rdseedsi_1
;
30360 case IX86_BUILTIN_RDSEED64_STEP
:
30361 icode
= CODE_FOR_rdseeddi_1
;
30365 op0
= gen_reg_rtx (mode0
);
30366 emit_insn (GEN_FCN (icode
) (op0
));
30368 arg0
= CALL_EXPR_ARG (exp
, 0);
30369 op1
= expand_normal (arg0
);
30370 if (!address_operand (op1
, VOIDmode
))
30372 op1
= convert_memory_address (Pmode
, op1
);
30373 op1
= copy_addr_to_reg (op1
);
30375 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
30377 op2
= gen_reg_rtx (QImode
);
30379 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
30381 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
30384 target
= gen_reg_rtx (SImode
);
30386 emit_insn (gen_zero_extendqisi2 (target
, op2
));
30389 case IX86_BUILTIN_ADDCARRYX32
:
30390 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
30394 case IX86_BUILTIN_ADDCARRYX64
:
30395 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
30399 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
30400 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
30401 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
30402 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
30404 op0
= gen_reg_rtx (QImode
);
30406 /* Generate CF from input operand. */
30407 op1
= expand_normal (arg0
);
30408 if (GET_MODE (op1
) != QImode
)
30409 op1
= convert_to_mode (QImode
, op1
, 1);
30410 op1
= copy_to_mode_reg (QImode
, op1
);
30411 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
30413 /* Gen ADCX instruction to compute X+Y+CF. */
30414 op2
= expand_normal (arg1
);
30415 op3
= expand_normal (arg2
);
30418 op2
= copy_to_mode_reg (mode0
, op2
);
30420 op3
= copy_to_mode_reg (mode0
, op3
);
30422 op0
= gen_reg_rtx (mode0
);
30424 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
30425 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
30426 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
30428 /* Store the result. */
30429 op4
= expand_normal (arg3
);
30430 if (!address_operand (op4
, VOIDmode
))
30432 op4
= convert_memory_address (Pmode
, op4
);
30433 op4
= copy_addr_to_reg (op4
);
30435 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
30437 /* Return current CF value. */
30439 target
= gen_reg_rtx (QImode
);
30441 PUT_MODE (pat
, QImode
);
30442 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
30445 case IX86_BUILTIN_GATHERSIV2DF
:
30446 icode
= CODE_FOR_avx2_gathersiv2df
;
30448 case IX86_BUILTIN_GATHERSIV4DF
:
30449 icode
= CODE_FOR_avx2_gathersiv4df
;
30451 case IX86_BUILTIN_GATHERDIV2DF
:
30452 icode
= CODE_FOR_avx2_gatherdiv2df
;
30454 case IX86_BUILTIN_GATHERDIV4DF
:
30455 icode
= CODE_FOR_avx2_gatherdiv4df
;
30457 case IX86_BUILTIN_GATHERSIV4SF
:
30458 icode
= CODE_FOR_avx2_gathersiv4sf
;
30460 case IX86_BUILTIN_GATHERSIV8SF
:
30461 icode
= CODE_FOR_avx2_gathersiv8sf
;
30463 case IX86_BUILTIN_GATHERDIV4SF
:
30464 icode
= CODE_FOR_avx2_gatherdiv4sf
;
30466 case IX86_BUILTIN_GATHERDIV8SF
:
30467 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30469 case IX86_BUILTIN_GATHERSIV2DI
:
30470 icode
= CODE_FOR_avx2_gathersiv2di
;
30472 case IX86_BUILTIN_GATHERSIV4DI
:
30473 icode
= CODE_FOR_avx2_gathersiv4di
;
30475 case IX86_BUILTIN_GATHERDIV2DI
:
30476 icode
= CODE_FOR_avx2_gatherdiv2di
;
30478 case IX86_BUILTIN_GATHERDIV4DI
:
30479 icode
= CODE_FOR_avx2_gatherdiv4di
;
30481 case IX86_BUILTIN_GATHERSIV4SI
:
30482 icode
= CODE_FOR_avx2_gathersiv4si
;
30484 case IX86_BUILTIN_GATHERSIV8SI
:
30485 icode
= CODE_FOR_avx2_gathersiv8si
;
30487 case IX86_BUILTIN_GATHERDIV4SI
:
30488 icode
= CODE_FOR_avx2_gatherdiv4si
;
30490 case IX86_BUILTIN_GATHERDIV8SI
:
30491 icode
= CODE_FOR_avx2_gatherdiv8si
;
30493 case IX86_BUILTIN_GATHERALTSIV4DF
:
30494 icode
= CODE_FOR_avx2_gathersiv4df
;
30496 case IX86_BUILTIN_GATHERALTDIV8SF
:
30497 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30499 case IX86_BUILTIN_GATHERALTSIV4DI
:
30500 icode
= CODE_FOR_avx2_gathersiv4di
;
30502 case IX86_BUILTIN_GATHERALTDIV8SI
:
30503 icode
= CODE_FOR_avx2_gatherdiv8si
;
30507 arg0
= CALL_EXPR_ARG (exp
, 0);
30508 arg1
= CALL_EXPR_ARG (exp
, 1);
30509 arg2
= CALL_EXPR_ARG (exp
, 2);
30510 arg3
= CALL_EXPR_ARG (exp
, 3);
30511 arg4
= CALL_EXPR_ARG (exp
, 4);
30512 op0
= expand_normal (arg0
);
30513 op1
= expand_normal (arg1
);
30514 op2
= expand_normal (arg2
);
30515 op3
= expand_normal (arg3
);
30516 op4
= expand_normal (arg4
);
30517 /* Note the arg order is different from the operand order. */
30518 mode0
= insn_data
[icode
].operand
[1].mode
;
30519 mode2
= insn_data
[icode
].operand
[3].mode
;
30520 mode3
= insn_data
[icode
].operand
[4].mode
;
30521 mode4
= insn_data
[icode
].operand
[5].mode
;
30523 if (target
== NULL_RTX
30524 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
30525 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
30527 subtarget
= target
;
30529 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
30530 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
30532 rtx half
= gen_reg_rtx (V4SImode
);
30533 if (!nonimmediate_operand (op2
, V8SImode
))
30534 op2
= copy_to_mode_reg (V8SImode
, op2
);
30535 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
30538 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
30539 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
30541 rtx (*gen
) (rtx
, rtx
);
30542 rtx half
= gen_reg_rtx (mode0
);
30543 if (mode0
== V4SFmode
)
30544 gen
= gen_vec_extract_lo_v8sf
;
30546 gen
= gen_vec_extract_lo_v8si
;
30547 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
30548 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
30549 emit_insn (gen (half
, op0
));
30551 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
30552 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
30553 emit_insn (gen (half
, op3
));
30557 /* Force memory operand only with base register here. But we
30558 don't want to do it on memory operand for other builtin
30560 if (GET_MODE (op1
) != Pmode
)
30561 op1
= convert_to_mode (Pmode
, op1
, 1);
30562 op1
= force_reg (Pmode
, op1
);
30564 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30565 op0
= copy_to_mode_reg (mode0
, op0
);
30566 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
30567 op1
= copy_to_mode_reg (Pmode
, op1
);
30568 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
30569 op2
= copy_to_mode_reg (mode2
, op2
);
30570 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
30571 op3
= copy_to_mode_reg (mode3
, op3
);
30572 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
30574 error ("last argument must be scale 1, 2, 4, 8");
30578 /* Optimize. If mask is known to have all high bits set,
30579 replace op0 with pc_rtx to signal that the instruction
30580 overwrites the whole destination and doesn't use its
30581 previous contents. */
30584 if (TREE_CODE (arg3
) == VECTOR_CST
)
30586 unsigned int negative
= 0;
30587 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
30589 tree cst
= VECTOR_CST_ELT (arg3
, i
);
30590 if (TREE_CODE (cst
) == INTEGER_CST
30591 && tree_int_cst_sign_bit (cst
))
30593 else if (TREE_CODE (cst
) == REAL_CST
30594 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
30597 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
30600 else if (TREE_CODE (arg3
) == SSA_NAME
)
30602 /* Recognize also when mask is like:
30603 __v2df src = _mm_setzero_pd ();
30604 __v2df mask = _mm_cmpeq_pd (src, src);
30606 __v8sf src = _mm256_setzero_ps ();
30607 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
30608 as that is a cheaper way to load all ones into
30609 a register than having to load a constant from
30611 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
30612 if (is_gimple_call (def_stmt
))
30614 tree fndecl
= gimple_call_fndecl (def_stmt
);
30616 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30617 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
30619 case IX86_BUILTIN_CMPPD
:
30620 case IX86_BUILTIN_CMPPS
:
30621 case IX86_BUILTIN_CMPPD256
:
30622 case IX86_BUILTIN_CMPPS256
:
30623 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
30626 case IX86_BUILTIN_CMPEQPD
:
30627 case IX86_BUILTIN_CMPEQPS
:
30628 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
30629 && initializer_zerop (gimple_call_arg (def_stmt
,
30640 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
30645 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
30646 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
30648 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
30649 ? V4SFmode
: V4SImode
;
30650 if (target
== NULL_RTX
)
30651 target
= gen_reg_rtx (tmode
);
30652 if (tmode
== V4SFmode
)
30653 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
30655 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
30658 target
= subtarget
;
30662 case IX86_BUILTIN_XABORT
:
30663 icode
= CODE_FOR_xabort
;
30664 arg0
= CALL_EXPR_ARG (exp
, 0);
30665 op0
= expand_normal (arg0
);
30666 mode0
= insn_data
[icode
].operand
[0].mode
;
30667 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
30669 error ("the xabort's argument must be an 8-bit immediate");
30672 emit_insn (gen_xabort (op0
));
30679 for (i
= 0, d
= bdesc_special_args
;
30680 i
< ARRAY_SIZE (bdesc_special_args
);
30682 if (d
->code
== fcode
)
30683 return ix86_expand_special_args_builtin (d
, exp
, target
);
30685 for (i
= 0, d
= bdesc_args
;
30686 i
< ARRAY_SIZE (bdesc_args
);
30688 if (d
->code
== fcode
)
30691 case IX86_BUILTIN_FABSQ
:
30692 case IX86_BUILTIN_COPYSIGNQ
:
30694 /* Emit a normal call if SSE isn't available. */
30695 return expand_call (exp
, target
, ignore
);
30697 return ix86_expand_args_builtin (d
, exp
, target
);
30700 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
30701 if (d
->code
== fcode
)
30702 return ix86_expand_sse_comi (d
, exp
, target
);
30704 for (i
= 0, d
= bdesc_pcmpestr
;
30705 i
< ARRAY_SIZE (bdesc_pcmpestr
);
30707 if (d
->code
== fcode
)
30708 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
30710 for (i
= 0, d
= bdesc_pcmpistr
;
30711 i
< ARRAY_SIZE (bdesc_pcmpistr
);
30713 if (d
->code
== fcode
)
30714 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
30716 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
30717 if (d
->code
== fcode
)
30718 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
30719 (enum ix86_builtin_func_type
)
30720 d
->flag
, d
->comparison
);
30722 gcc_unreachable ();
30725 /* Returns a function decl for a vectorized version of the builtin function
30726 with builtin function code FN and the result vector type TYPE, or NULL_TREE
30727 if it is not available. */
30730 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
30733 enum machine_mode in_mode
, out_mode
;
30735 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
30737 if (TREE_CODE (type_out
) != VECTOR_TYPE
30738 || TREE_CODE (type_in
) != VECTOR_TYPE
30739 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
30742 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30743 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
30744 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30745 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30749 case BUILT_IN_SQRT
:
30750 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30752 if (out_n
== 2 && in_n
== 2)
30753 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
30754 else if (out_n
== 4 && in_n
== 4)
30755 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
30759 case BUILT_IN_SQRTF
:
30760 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30762 if (out_n
== 4 && in_n
== 4)
30763 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
30764 else if (out_n
== 8 && in_n
== 8)
30765 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
30769 case BUILT_IN_IFLOOR
:
30770 case BUILT_IN_LFLOOR
:
30771 case BUILT_IN_LLFLOOR
:
30772 /* The round insn does not trap on denormals. */
30773 if (flag_trapping_math
|| !TARGET_ROUND
)
30776 if (out_mode
== SImode
&& in_mode
== DFmode
)
30778 if (out_n
== 4 && in_n
== 2)
30779 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
30780 else if (out_n
== 8 && in_n
== 4)
30781 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
30785 case BUILT_IN_IFLOORF
:
30786 case BUILT_IN_LFLOORF
:
30787 case BUILT_IN_LLFLOORF
:
30788 /* The round insn does not trap on denormals. */
30789 if (flag_trapping_math
|| !TARGET_ROUND
)
30792 if (out_mode
== SImode
&& in_mode
== SFmode
)
30794 if (out_n
== 4 && in_n
== 4)
30795 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
30796 else if (out_n
== 8 && in_n
== 8)
30797 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
30801 case BUILT_IN_ICEIL
:
30802 case BUILT_IN_LCEIL
:
30803 case BUILT_IN_LLCEIL
:
30804 /* The round insn does not trap on denormals. */
30805 if (flag_trapping_math
|| !TARGET_ROUND
)
30808 if (out_mode
== SImode
&& in_mode
== DFmode
)
30810 if (out_n
== 4 && in_n
== 2)
30811 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
30812 else if (out_n
== 8 && in_n
== 4)
30813 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
30817 case BUILT_IN_ICEILF
:
30818 case BUILT_IN_LCEILF
:
30819 case BUILT_IN_LLCEILF
:
30820 /* The round insn does not trap on denormals. */
30821 if (flag_trapping_math
|| !TARGET_ROUND
)
30824 if (out_mode
== SImode
&& in_mode
== SFmode
)
30826 if (out_n
== 4 && in_n
== 4)
30827 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
30828 else if (out_n
== 8 && in_n
== 8)
30829 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
30833 case BUILT_IN_IRINT
:
30834 case BUILT_IN_LRINT
:
30835 case BUILT_IN_LLRINT
:
30836 if (out_mode
== SImode
&& in_mode
== DFmode
)
30838 if (out_n
== 4 && in_n
== 2)
30839 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
30840 else if (out_n
== 8 && in_n
== 4)
30841 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
30845 case BUILT_IN_IRINTF
:
30846 case BUILT_IN_LRINTF
:
30847 case BUILT_IN_LLRINTF
:
30848 if (out_mode
== SImode
&& in_mode
== SFmode
)
30850 if (out_n
== 4 && in_n
== 4)
30851 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
30852 else if (out_n
== 8 && in_n
== 8)
30853 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
30857 case BUILT_IN_IROUND
:
30858 case BUILT_IN_LROUND
:
30859 case BUILT_IN_LLROUND
:
30860 /* The round insn does not trap on denormals. */
30861 if (flag_trapping_math
|| !TARGET_ROUND
)
30864 if (out_mode
== SImode
&& in_mode
== DFmode
)
30866 if (out_n
== 4 && in_n
== 2)
30867 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
30868 else if (out_n
== 8 && in_n
== 4)
30869 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
30873 case BUILT_IN_IROUNDF
:
30874 case BUILT_IN_LROUNDF
:
30875 case BUILT_IN_LLROUNDF
:
30876 /* The round insn does not trap on denormals. */
30877 if (flag_trapping_math
|| !TARGET_ROUND
)
30880 if (out_mode
== SImode
&& in_mode
== SFmode
)
30882 if (out_n
== 4 && in_n
== 4)
30883 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
30884 else if (out_n
== 8 && in_n
== 8)
30885 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
30889 case BUILT_IN_COPYSIGN
:
30890 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30892 if (out_n
== 2 && in_n
== 2)
30893 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
30894 else if (out_n
== 4 && in_n
== 4)
30895 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
30899 case BUILT_IN_COPYSIGNF
:
30900 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30902 if (out_n
== 4 && in_n
== 4)
30903 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
30904 else if (out_n
== 8 && in_n
== 8)
30905 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
30909 case BUILT_IN_FLOOR
:
30910 /* The round insn does not trap on denormals. */
30911 if (flag_trapping_math
|| !TARGET_ROUND
)
30914 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30916 if (out_n
== 2 && in_n
== 2)
30917 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
30918 else if (out_n
== 4 && in_n
== 4)
30919 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
30923 case BUILT_IN_FLOORF
:
30924 /* The round insn does not trap on denormals. */
30925 if (flag_trapping_math
|| !TARGET_ROUND
)
30928 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30930 if (out_n
== 4 && in_n
== 4)
30931 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
30932 else if (out_n
== 8 && in_n
== 8)
30933 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
30937 case BUILT_IN_CEIL
:
30938 /* The round insn does not trap on denormals. */
30939 if (flag_trapping_math
|| !TARGET_ROUND
)
30942 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30944 if (out_n
== 2 && in_n
== 2)
30945 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
30946 else if (out_n
== 4 && in_n
== 4)
30947 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
30951 case BUILT_IN_CEILF
:
30952 /* The round insn does not trap on denormals. */
30953 if (flag_trapping_math
|| !TARGET_ROUND
)
30956 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30958 if (out_n
== 4 && in_n
== 4)
30959 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
30960 else if (out_n
== 8 && in_n
== 8)
30961 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
30965 case BUILT_IN_TRUNC
:
30966 /* The round insn does not trap on denormals. */
30967 if (flag_trapping_math
|| !TARGET_ROUND
)
30970 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30972 if (out_n
== 2 && in_n
== 2)
30973 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
30974 else if (out_n
== 4 && in_n
== 4)
30975 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
30979 case BUILT_IN_TRUNCF
:
30980 /* The round insn does not trap on denormals. */
30981 if (flag_trapping_math
|| !TARGET_ROUND
)
30984 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30986 if (out_n
== 4 && in_n
== 4)
30987 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
30988 else if (out_n
== 8 && in_n
== 8)
30989 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
30993 case BUILT_IN_RINT
:
30994 /* The round insn does not trap on denormals. */
30995 if (flag_trapping_math
|| !TARGET_ROUND
)
30998 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31000 if (out_n
== 2 && in_n
== 2)
31001 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
31002 else if (out_n
== 4 && in_n
== 4)
31003 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
31007 case BUILT_IN_RINTF
:
31008 /* The round insn does not trap on denormals. */
31009 if (flag_trapping_math
|| !TARGET_ROUND
)
31012 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31014 if (out_n
== 4 && in_n
== 4)
31015 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
31016 else if (out_n
== 8 && in_n
== 8)
31017 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
31021 case BUILT_IN_ROUND
:
31022 /* The round insn does not trap on denormals. */
31023 if (flag_trapping_math
|| !TARGET_ROUND
)
31026 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31028 if (out_n
== 2 && in_n
== 2)
31029 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
31030 else if (out_n
== 4 && in_n
== 4)
31031 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
31035 case BUILT_IN_ROUNDF
:
31036 /* The round insn does not trap on denormals. */
31037 if (flag_trapping_math
|| !TARGET_ROUND
)
31040 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31042 if (out_n
== 4 && in_n
== 4)
31043 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
31044 else if (out_n
== 8 && in_n
== 8)
31045 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
31050 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31052 if (out_n
== 2 && in_n
== 2)
31053 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
31054 if (out_n
== 4 && in_n
== 4)
31055 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
31059 case BUILT_IN_FMAF
:
31060 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31062 if (out_n
== 4 && in_n
== 4)
31063 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
31064 if (out_n
== 8 && in_n
== 8)
31065 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
31073 /* Dispatch to a handler for a vectorization library. */
31074 if (ix86_veclib_handler
)
31075 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
31081 /* Handler for an SVML-style interface to
31082 a library with vectorized intrinsics. */
31085 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
31088 tree fntype
, new_fndecl
, args
;
31091 enum machine_mode el_mode
, in_mode
;
31094 /* The SVML is suitable for unsafe math only. */
31095 if (!flag_unsafe_math_optimizations
)
31098 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
31099 n
= TYPE_VECTOR_SUBPARTS (type_out
);
31100 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
31101 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
31102 if (el_mode
!= in_mode
31110 case BUILT_IN_LOG10
:
31112 case BUILT_IN_TANH
:
31114 case BUILT_IN_ATAN
:
31115 case BUILT_IN_ATAN2
:
31116 case BUILT_IN_ATANH
:
31117 case BUILT_IN_CBRT
:
31118 case BUILT_IN_SINH
:
31120 case BUILT_IN_ASINH
:
31121 case BUILT_IN_ASIN
:
31122 case BUILT_IN_COSH
:
31124 case BUILT_IN_ACOSH
:
31125 case BUILT_IN_ACOS
:
31126 if (el_mode
!= DFmode
|| n
!= 2)
31130 case BUILT_IN_EXPF
:
31131 case BUILT_IN_LOGF
:
31132 case BUILT_IN_LOG10F
:
31133 case BUILT_IN_POWF
:
31134 case BUILT_IN_TANHF
:
31135 case BUILT_IN_TANF
:
31136 case BUILT_IN_ATANF
:
31137 case BUILT_IN_ATAN2F
:
31138 case BUILT_IN_ATANHF
:
31139 case BUILT_IN_CBRTF
:
31140 case BUILT_IN_SINHF
:
31141 case BUILT_IN_SINF
:
31142 case BUILT_IN_ASINHF
:
31143 case BUILT_IN_ASINF
:
31144 case BUILT_IN_COSHF
:
31145 case BUILT_IN_COSF
:
31146 case BUILT_IN_ACOSHF
:
31147 case BUILT_IN_ACOSF
:
31148 if (el_mode
!= SFmode
|| n
!= 4)
31156 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
31158 if (fn
== BUILT_IN_LOGF
)
31159 strcpy (name
, "vmlsLn4");
31160 else if (fn
== BUILT_IN_LOG
)
31161 strcpy (name
, "vmldLn2");
31164 sprintf (name
, "vmls%s", bname
+10);
31165 name
[strlen (name
)-1] = '4';
31168 sprintf (name
, "vmld%s2", bname
+10);
31170 /* Convert to uppercase. */
31174 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
31176 args
= TREE_CHAIN (args
))
31180 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
31182 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
31184 /* Build a function declaration for the vectorized function. */
31185 new_fndecl
= build_decl (BUILTINS_LOCATION
,
31186 FUNCTION_DECL
, get_identifier (name
), fntype
);
31187 TREE_PUBLIC (new_fndecl
) = 1;
31188 DECL_EXTERNAL (new_fndecl
) = 1;
31189 DECL_IS_NOVOPS (new_fndecl
) = 1;
31190 TREE_READONLY (new_fndecl
) = 1;
31195 /* Handler for an ACML-style interface to
31196 a library with vectorized intrinsics. */
31199 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
31201 char name
[20] = "__vr.._";
31202 tree fntype
, new_fndecl
, args
;
31205 enum machine_mode el_mode
, in_mode
;
31208 /* The ACML is 64bits only and suitable for unsafe math only as
31209 it does not correctly support parts of IEEE with the required
31210 precision such as denormals. */
31212 || !flag_unsafe_math_optimizations
)
31215 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
31216 n
= TYPE_VECTOR_SUBPARTS (type_out
);
31217 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
31218 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
31219 if (el_mode
!= in_mode
31229 case BUILT_IN_LOG2
:
31230 case BUILT_IN_LOG10
:
31233 if (el_mode
!= DFmode
31238 case BUILT_IN_SINF
:
31239 case BUILT_IN_COSF
:
31240 case BUILT_IN_EXPF
:
31241 case BUILT_IN_POWF
:
31242 case BUILT_IN_LOGF
:
31243 case BUILT_IN_LOG2F
:
31244 case BUILT_IN_LOG10F
:
31247 if (el_mode
!= SFmode
31256 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
31257 sprintf (name
+ 7, "%s", bname
+10);
31260 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
31262 args
= TREE_CHAIN (args
))
31266 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
31268 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
31270 /* Build a function declaration for the vectorized function. */
31271 new_fndecl
= build_decl (BUILTINS_LOCATION
,
31272 FUNCTION_DECL
, get_identifier (name
), fntype
);
31273 TREE_PUBLIC (new_fndecl
) = 1;
31274 DECL_EXTERNAL (new_fndecl
) = 1;
31275 DECL_IS_NOVOPS (new_fndecl
) = 1;
31276 TREE_READONLY (new_fndecl
) = 1;
31281 /* Returns a decl of a function that implements gather load with
31282 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
31283 Return NULL_TREE if it is not available. */
31286 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
31287 const_tree index_type
, int scale
)
31290 enum ix86_builtins code
;
31295 if ((TREE_CODE (index_type
) != INTEGER_TYPE
31296 && !POINTER_TYPE_P (index_type
))
31297 || (TYPE_MODE (index_type
) != SImode
31298 && TYPE_MODE (index_type
) != DImode
))
31301 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
31304 /* v*gather* insn sign extends index to pointer mode. */
31305 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
31306 && TYPE_UNSIGNED (index_type
))
31311 || (scale
& (scale
- 1)) != 0)
31314 si
= TYPE_MODE (index_type
) == SImode
;
31315 switch (TYPE_MODE (mem_vectype
))
31318 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
31321 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
31324 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
31327 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
31330 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
31333 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
31336 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
31339 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
31345 return ix86_builtins
[code
];
31348 /* Returns a code for a target-specific builtin that implements
31349 reciprocal of the function, or NULL_TREE if not available. */
31352 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
31353 bool sqrt ATTRIBUTE_UNUSED
)
31355 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
31356 && flag_finite_math_only
&& !flag_trapping_math
31357 && flag_unsafe_math_optimizations
))
31361 /* Machine dependent builtins. */
31364 /* Vectorized version of sqrt to rsqrt conversion. */
31365 case IX86_BUILTIN_SQRTPS_NR
:
31366 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
31368 case IX86_BUILTIN_SQRTPS_NR256
:
31369 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
31375 /* Normal builtins. */
31378 /* Sqrt to rsqrt conversion. */
31379 case BUILT_IN_SQRTF
:
31380 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
31387 /* Helper for avx_vpermilps256_operand et al. This is also used by
31388 the expansion functions to turn the parallel back into a mask.
31389 The return value is 0 for no match and the imm8+1 for a match. */
31392 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
31394 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
31396 unsigned char ipar
[8];
31398 if (XVECLEN (par
, 0) != (int) nelt
)
31401 /* Validate that all of the elements are constants, and not totally
31402 out of range. Copy the data into an integral array to make the
31403 subsequent checks easier. */
31404 for (i
= 0; i
< nelt
; ++i
)
31406 rtx er
= XVECEXP (par
, 0, i
);
31407 unsigned HOST_WIDE_INT ei
;
31409 if (!CONST_INT_P (er
))
31420 /* In the 256-bit DFmode case, we can only move elements within
31422 for (i
= 0; i
< 2; ++i
)
31426 mask
|= ipar
[i
] << i
;
31428 for (i
= 2; i
< 4; ++i
)
31432 mask
|= (ipar
[i
] - 2) << i
;
31437 /* In the 256-bit SFmode case, we have full freedom of movement
31438 within the low 128-bit lane, but the high 128-bit lane must
31439 mirror the exact same pattern. */
31440 for (i
= 0; i
< 4; ++i
)
31441 if (ipar
[i
] + 4 != ipar
[i
+ 4])
31448 /* In the 128-bit case, we've full freedom in the placement of
31449 the elements from the source operand. */
31450 for (i
= 0; i
< nelt
; ++i
)
31451 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
31455 gcc_unreachable ();
31458 /* Make sure success has a non-zero value by adding one. */
31462 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
31463 the expansion functions to turn the parallel back into a mask.
31464 The return value is 0 for no match and the imm8+1 for a match. */
31467 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
31469 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
31471 unsigned char ipar
[8];
31473 if (XVECLEN (par
, 0) != (int) nelt
)
31476 /* Validate that all of the elements are constants, and not totally
31477 out of range. Copy the data into an integral array to make the
31478 subsequent checks easier. */
31479 for (i
= 0; i
< nelt
; ++i
)
31481 rtx er
= XVECEXP (par
, 0, i
);
31482 unsigned HOST_WIDE_INT ei
;
31484 if (!CONST_INT_P (er
))
31487 if (ei
>= 2 * nelt
)
31492 /* Validate that the halves of the permute are halves. */
31493 for (i
= 0; i
< nelt2
- 1; ++i
)
31494 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31496 for (i
= nelt2
; i
< nelt
- 1; ++i
)
31497 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31500 /* Reconstruct the mask. */
31501 for (i
= 0; i
< 2; ++i
)
31503 unsigned e
= ipar
[i
* nelt2
];
31507 mask
|= e
<< (i
* 4);
31510 /* Make sure success has a non-zero value by adding one. */
31514 /* Store OPERAND to the memory after reload is completed. This means
31515 that we can't easily use assign_stack_local. */
31517 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
31521 gcc_assert (reload_completed
);
31522 if (ix86_using_red_zone ())
31524 result
= gen_rtx_MEM (mode
,
31525 gen_rtx_PLUS (Pmode
,
31527 GEN_INT (-RED_ZONE_SIZE
)));
31528 emit_move_insn (result
, operand
);
31530 else if (TARGET_64BIT
)
31536 operand
= gen_lowpart (DImode
, operand
);
31540 gen_rtx_SET (VOIDmode
,
31541 gen_rtx_MEM (DImode
,
31542 gen_rtx_PRE_DEC (DImode
,
31543 stack_pointer_rtx
)),
31547 gcc_unreachable ();
31549 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31558 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
31560 gen_rtx_SET (VOIDmode
,
31561 gen_rtx_MEM (SImode
,
31562 gen_rtx_PRE_DEC (Pmode
,
31563 stack_pointer_rtx
)),
31566 gen_rtx_SET (VOIDmode
,
31567 gen_rtx_MEM (SImode
,
31568 gen_rtx_PRE_DEC (Pmode
,
31569 stack_pointer_rtx
)),
31574 /* Store HImodes as SImodes. */
31575 operand
= gen_lowpart (SImode
, operand
);
31579 gen_rtx_SET (VOIDmode
,
31580 gen_rtx_MEM (GET_MODE (operand
),
31581 gen_rtx_PRE_DEC (SImode
,
31582 stack_pointer_rtx
)),
31586 gcc_unreachable ();
31588 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31593 /* Free operand from the memory. */
31595 ix86_free_from_memory (enum machine_mode mode
)
31597 if (!ix86_using_red_zone ())
31601 if (mode
== DImode
|| TARGET_64BIT
)
31605 /* Use LEA to deallocate stack space. In peephole2 it will be converted
31606 to pop or add instruction if registers are available. */
31607 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
31608 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
31613 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
31615 Put float CONST_DOUBLE in the constant pool instead of fp regs.
31616 QImode must go into class Q_REGS.
31617 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
31618 movdf to do mem-to-mem moves through integer regs. */
31621 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
31623 enum machine_mode mode
= GET_MODE (x
);
31625 /* We're only allowed to return a subclass of CLASS. Many of the
31626 following checks fail for NO_REGS, so eliminate that early. */
31627 if (regclass
== NO_REGS
)
31630 /* All classes can load zeros. */
31631 if (x
== CONST0_RTX (mode
))
31634 /* Force constants into memory if we are loading a (nonzero) constant into
31635 an MMX or SSE register. This is because there are no MMX/SSE instructions
31636 to load from a constant. */
31638 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
31641 /* Prefer SSE regs only, if we can use them for math. */
31642 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
31643 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31645 /* Floating-point constants need more complex checks. */
31646 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
31648 /* General regs can load everything. */
31649 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
31652 /* Floats can load 0 and 1 plus some others. Note that we eliminated
31653 zero above. We only want to wind up preferring 80387 registers if
31654 we plan on doing computation with them. */
31656 && standard_80387_constant_p (x
) > 0)
31658 /* Limit class to non-sse. */
31659 if (regclass
== FLOAT_SSE_REGS
)
31661 if (regclass
== FP_TOP_SSE_REGS
)
31663 if (regclass
== FP_SECOND_SSE_REGS
)
31664 return FP_SECOND_REG
;
31665 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
31672 /* Generally when we see PLUS here, it's the function invariant
31673 (plus soft-fp const_int). Which can only be computed into general
31675 if (GET_CODE (x
) == PLUS
)
31676 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
31678 /* QImode constants are easy to load, but non-constant QImode data
31679 must go into Q_REGS. */
31680 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
31682 if (reg_class_subset_p (regclass
, Q_REGS
))
31684 if (reg_class_subset_p (Q_REGS
, regclass
))
31692 /* Discourage putting floating-point values in SSE registers unless
31693 SSE math is being used, and likewise for the 387 registers. */
31695 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
31697 enum machine_mode mode
= GET_MODE (x
);
31699 /* Restrict the output reload class to the register bank that we are doing
31700 math on. If we would like not to return a subset of CLASS, reject this
31701 alternative: if reload cannot do this, it will still use its choice. */
31702 mode
= GET_MODE (x
);
31703 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
31704 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
31706 if (X87_FLOAT_MODE_P (mode
))
31708 if (regclass
== FP_TOP_SSE_REGS
)
31710 else if (regclass
== FP_SECOND_SSE_REGS
)
31711 return FP_SECOND_REG
;
31713 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31720 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
31721 enum machine_mode mode
, secondary_reload_info
*sri
)
31723 /* Double-word spills from general registers to non-offsettable memory
31724 references (zero-extended addresses) require special handling. */
31727 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
31728 && rclass
== GENERAL_REGS
31729 && !offsettable_memref_p (x
))
31732 ? CODE_FOR_reload_noff_load
31733 : CODE_FOR_reload_noff_store
);
31734 /* Add the cost of moving address to a temporary. */
31735 sri
->extra_cost
= 1;
31740 /* QImode spills from non-QI registers require
31741 intermediate register on 32bit targets. */
31743 && !in_p
&& mode
== QImode
31744 && (rclass
== GENERAL_REGS
31745 || rclass
== LEGACY_REGS
31746 || rclass
== INDEX_REGS
))
31755 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
31756 regno
= true_regnum (x
);
31758 /* Return Q_REGS if the operand is in memory. */
31763 /* This condition handles corner case where an expression involving
31764 pointers gets vectorized. We're trying to use the address of a
31765 stack slot as a vector initializer.
31767 (set (reg:V2DI 74 [ vect_cst_.2 ])
31768 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
31770 Eventually frame gets turned into sp+offset like this:
31772 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31773 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31774 (const_int 392 [0x188]))))
31776 That later gets turned into:
31778 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31779 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31780 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
31782 We'll have the following reload recorded:
31784 Reload 0: reload_in (DI) =
31785 (plus:DI (reg/f:DI 7 sp)
31786 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
31787 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31788 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
31789 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
31790 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31791 reload_reg_rtx: (reg:V2DI 22 xmm1)
31793 Which isn't going to work since SSE instructions can't handle scalar
31794 additions. Returning GENERAL_REGS forces the addition into integer
31795 register and reload can handle subsequent reloads without problems. */
31797 if (in_p
&& GET_CODE (x
) == PLUS
31798 && SSE_CLASS_P (rclass
)
31799 && SCALAR_INT_MODE_P (mode
))
31800 return GENERAL_REGS
;
31805 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
31808 ix86_class_likely_spilled_p (reg_class_t rclass
)
31819 case SSE_FIRST_REG
:
31821 case FP_SECOND_REG
:
31831 /* If we are copying between general and FP registers, we need a memory
31832 location. The same is true for SSE and MMX registers.
31834 To optimize register_move_cost performance, allow inline variant.
31836 The macro can't work reliably when one of the CLASSES is class containing
31837 registers from multiple units (SSE, MMX, integer). We avoid this by never
31838 combining those units in single alternative in the machine description.
31839 Ensure that this constraint holds to avoid unexpected surprises.
31841 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
31842 enforce these sanity checks. */
31845 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31846 enum machine_mode mode
, int strict
)
31848 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
31849 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
31850 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
31851 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
31852 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
31853 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
31855 gcc_assert (!strict
);
31859 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
31862 /* ??? This is a lie. We do have moves between mmx/general, and for
31863 mmx/sse2. But by saying we need secondary memory we discourage the
31864 register allocator from using the mmx registers unless needed. */
31865 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
31868 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31870 /* SSE1 doesn't have any direct moves from other classes. */
31874 /* If the target says that inter-unit moves are more expensive
31875 than moving through memory, then don't generate them. */
31876 if (!TARGET_INTER_UNIT_MOVES
)
31879 /* Between SSE and general, we have moves no larger than word size. */
31880 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
31888 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31889 enum machine_mode mode
, int strict
)
31891 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
31894 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31896 On the 80386, this is the size of MODE in words,
31897 except in the FP regs, where a single reg is always enough. */
31899 static unsigned char
31900 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
31902 if (MAYBE_INTEGER_CLASS_P (rclass
))
31904 if (mode
== XFmode
)
31905 return (TARGET_64BIT
? 2 : 3);
31906 else if (mode
== XCmode
)
31907 return (TARGET_64BIT
? 4 : 6);
31909 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
31913 if (COMPLEX_MODE_P (mode
))
31920 /* Return true if the registers in CLASS cannot represent the change from
31921 modes FROM to TO. */
31924 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
31925 enum reg_class regclass
)
31930 /* x87 registers can't do subreg at all, as all values are reformatted
31931 to extended precision. */
31932 if (MAYBE_FLOAT_CLASS_P (regclass
))
31935 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
31937 /* Vector registers do not support QI or HImode loads. If we don't
31938 disallow a change to these modes, reload will assume it's ok to
31939 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31940 the vec_dupv4hi pattern. */
31941 if (GET_MODE_SIZE (from
) < 4)
31944 /* Vector registers do not support subreg with nonzero offsets, which
31945 are otherwise valid for integer registers. Since we can't see
31946 whether we have a nonzero offset from here, prohibit all
31947 nonparadoxical subregs changing size. */
31948 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
31955 /* Return the cost of moving data of mode M between a
31956 register and memory. A value of 2 is the default; this cost is
31957 relative to those in `REGISTER_MOVE_COST'.
31959 This function is used extensively by register_move_cost that is used to
31960 build tables at startup. Make it inline in this case.
31961 When IN is 2, return maximum of in and out move cost.
31963 If moving between registers and memory is more expensive than
31964 between two registers, you should define this macro to express the
31967 Model also increased moving costs of QImode registers in non
31971 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
31975 if (FLOAT_CLASS_P (regclass
))
31993 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
31994 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
31996 if (SSE_CLASS_P (regclass
))
31999 switch (GET_MODE_SIZE (mode
))
32014 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
32015 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
32017 if (MMX_CLASS_P (regclass
))
32020 switch (GET_MODE_SIZE (mode
))
32032 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
32033 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
32035 switch (GET_MODE_SIZE (mode
))
32038 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
32041 return ix86_cost
->int_store
[0];
32042 if (TARGET_PARTIAL_REG_DEPENDENCY
32043 && optimize_function_for_speed_p (cfun
))
32044 cost
= ix86_cost
->movzbl_load
;
32046 cost
= ix86_cost
->int_load
[0];
32048 return MAX (cost
, ix86_cost
->int_store
[0]);
32054 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
32056 return ix86_cost
->movzbl_load
;
32058 return ix86_cost
->int_store
[0] + 4;
32063 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
32064 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
32066 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
32067 if (mode
== TFmode
)
32070 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
32072 cost
= ix86_cost
->int_load
[2];
32074 cost
= ix86_cost
->int_store
[2];
32075 return (cost
* (((int) GET_MODE_SIZE (mode
)
32076 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
32081 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
32084 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
32088 /* Return the cost of moving data from a register in class CLASS1 to
32089 one in class CLASS2.
32091 It is not required that the cost always equal 2 when FROM is the same as TO;
32092 on some machines it is expensive to move between registers if they are not
32093 general registers. */
32096 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
32097 reg_class_t class2_i
)
32099 enum reg_class class1
= (enum reg_class
) class1_i
;
32100 enum reg_class class2
= (enum reg_class
) class2_i
;
32102 /* In case we require secondary memory, compute cost of the store followed
32103 by load. In order to avoid bad register allocation choices, we need
32104 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
32106 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
32110 cost
+= inline_memory_move_cost (mode
, class1
, 2);
32111 cost
+= inline_memory_move_cost (mode
, class2
, 2);
32113 /* In case of copying from general_purpose_register we may emit multiple
32114 stores followed by single load causing memory size mismatch stall.
32115 Count this as arbitrarily high cost of 20. */
32116 if (targetm
.class_max_nregs (class1
, mode
)
32117 > targetm
.class_max_nregs (class2
, mode
))
32120 /* In the case of FP/MMX moves, the registers actually overlap, and we
32121 have to switch modes in order to treat them differently. */
32122 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
32123 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
32129 /* Moves between SSE/MMX and integer unit are expensive. */
32130 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
32131 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
32133 /* ??? By keeping returned value relatively high, we limit the number
32134 of moves between integer and MMX/SSE registers for all targets.
32135 Additionally, high value prevents problem with x86_modes_tieable_p(),
32136 where integer modes in MMX/SSE registers are not tieable
32137 because of missing QImode and HImode moves to, from or between
32138 MMX/SSE registers. */
32139 return MAX (8, ix86_cost
->mmxsse_to_integer
);
32141 if (MAYBE_FLOAT_CLASS_P (class1
))
32142 return ix86_cost
->fp_move
;
32143 if (MAYBE_SSE_CLASS_P (class1
))
32144 return ix86_cost
->sse_move
;
32145 if (MAYBE_MMX_CLASS_P (class1
))
32146 return ix86_cost
->mmx_move
;
32150 /* Return TRUE if hard register REGNO can hold a value of machine-mode
32154 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
32156 /* Flags and only flags can only hold CCmode values. */
32157 if (CC_REGNO_P (regno
))
32158 return GET_MODE_CLASS (mode
) == MODE_CC
;
32159 if (GET_MODE_CLASS (mode
) == MODE_CC
32160 || GET_MODE_CLASS (mode
) == MODE_RANDOM
32161 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
32163 if (FP_REGNO_P (regno
))
32164 return VALID_FP_MODE_P (mode
);
32165 if (SSE_REGNO_P (regno
))
32167 /* We implement the move patterns for all vector modes into and
32168 out of SSE registers, even when no operation instructions
32169 are available. OImode move is available only when AVX is
32171 return ((TARGET_AVX
&& mode
== OImode
)
32172 || VALID_AVX256_REG_MODE (mode
)
32173 || VALID_SSE_REG_MODE (mode
)
32174 || VALID_SSE2_REG_MODE (mode
)
32175 || VALID_MMX_REG_MODE (mode
)
32176 || VALID_MMX_REG_MODE_3DNOW (mode
));
32178 if (MMX_REGNO_P (regno
))
32180 /* We implement the move patterns for 3DNOW modes even in MMX mode,
32181 so if the register is available at all, then we can move data of
32182 the given mode into or out of it. */
32183 return (VALID_MMX_REG_MODE (mode
)
32184 || VALID_MMX_REG_MODE_3DNOW (mode
));
32187 if (mode
== QImode
)
32189 /* Take care for QImode values - they can be in non-QI regs,
32190 but then they do cause partial register stalls. */
32191 if (TARGET_64BIT
|| QI_REGNO_P (regno
))
32193 if (!TARGET_PARTIAL_REG_STALL
)
32195 return !can_create_pseudo_p ();
32197 /* We handle both integer and floats in the general purpose registers. */
32198 else if (VALID_INT_MODE_P (mode
))
32200 else if (VALID_FP_MODE_P (mode
))
32202 else if (VALID_DFP_MODE_P (mode
))
32204 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
32205 on to use that value in smaller contexts, this can easily force a
32206 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
32207 supporting DImode, allow it. */
32208 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
32214 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
32215 tieable integer mode. */
32218 ix86_tieable_integer_mode_p (enum machine_mode mode
)
32227 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
32230 return TARGET_64BIT
;
32237 /* Return true if MODE1 is accessible in a register that can hold MODE2
32238 without copying. That is, all register classes that can hold MODE2
32239 can also hold MODE1. */
32242 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
32244 if (mode1
== mode2
)
32247 if (ix86_tieable_integer_mode_p (mode1
)
32248 && ix86_tieable_integer_mode_p (mode2
))
32251 /* MODE2 being XFmode implies fp stack or general regs, which means we
32252 can tie any smaller floating point modes to it. Note that we do not
32253 tie this with TFmode. */
32254 if (mode2
== XFmode
)
32255 return mode1
== SFmode
|| mode1
== DFmode
;
32257 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
32258 that we can tie it with SFmode. */
32259 if (mode2
== DFmode
)
32260 return mode1
== SFmode
;
32262 /* If MODE2 is only appropriate for an SSE register, then tie with
32263 any other mode acceptable to SSE registers. */
32264 if (GET_MODE_SIZE (mode2
) == 32
32265 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
32266 return (GET_MODE_SIZE (mode1
) == 32
32267 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
32268 if (GET_MODE_SIZE (mode2
) == 16
32269 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
32270 return (GET_MODE_SIZE (mode1
) == 16
32271 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
32273 /* If MODE2 is appropriate for an MMX register, then tie
32274 with any other mode acceptable to MMX registers. */
32275 if (GET_MODE_SIZE (mode2
) == 8
32276 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
32277 return (GET_MODE_SIZE (mode1
) == 8
32278 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
32283 /* Return the cost of moving between two registers of mode MODE. */
32286 ix86_set_reg_reg_cost (enum machine_mode mode
)
32288 unsigned int units
= UNITS_PER_WORD
;
32290 switch (GET_MODE_CLASS (mode
))
32296 units
= GET_MODE_SIZE (CCmode
);
32300 if ((TARGET_SSE
&& mode
== TFmode
)
32301 || (TARGET_80387
&& mode
== XFmode
)
32302 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
32303 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
32304 units
= GET_MODE_SIZE (mode
);
32307 case MODE_COMPLEX_FLOAT
:
32308 if ((TARGET_SSE
&& mode
== TCmode
)
32309 || (TARGET_80387
&& mode
== XCmode
)
32310 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
32311 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
32312 units
= GET_MODE_SIZE (mode
);
32315 case MODE_VECTOR_INT
:
32316 case MODE_VECTOR_FLOAT
:
32317 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
32318 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
32319 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
32320 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
32321 units
= GET_MODE_SIZE (mode
);
32324 /* Return the cost of moving between two registers of mode MODE,
32325 assuming that the move will be in pieces of at most UNITS bytes. */
32326 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
32329 /* Compute a (partial) cost for rtx X. Return true if the complete
32330 cost has been computed, and false if subexpressions should be
32331 scanned. In either case, *TOTAL contains the cost result. */
32334 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
32337 enum rtx_code code
= (enum rtx_code
) code_i
;
32338 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
32339 enum machine_mode mode
= GET_MODE (x
);
32340 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
32345 if (register_operand (SET_DEST (x
), VOIDmode
)
32346 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
32348 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
32357 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
32359 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
32361 else if (flag_pic
&& SYMBOLIC_CONST (x
)
32363 || (!GET_CODE (x
) != LABEL_REF
32364 && (GET_CODE (x
) != SYMBOL_REF
32365 || !SYMBOL_REF_LOCAL_P (x
)))))
32372 if (mode
== VOIDmode
)
32377 switch (standard_80387_constant_p (x
))
32382 default: /* Other constants */
32389 if (SSE_FLOAT_MODE_P (mode
))
32392 switch (standard_sse_constant_p (x
))
32396 case 1: /* 0: xor eliminates false dependency */
32399 default: /* -1: cmp contains false dependency */
32404 /* Fall back to (MEM (SYMBOL_REF)), since that's where
32405 it'll probably end up. Add a penalty for size. */
32406 *total
= (COSTS_N_INSNS (1)
32407 + (flag_pic
!= 0 && !TARGET_64BIT
)
32408 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
32412 /* The zero extensions is often completely free on x86_64, so make
32413 it as cheap as possible. */
32414 if (TARGET_64BIT
&& mode
== DImode
32415 && GET_MODE (XEXP (x
, 0)) == SImode
)
32417 else if (TARGET_ZERO_EXTEND_WITH_AND
)
32418 *total
= cost
->add
;
32420 *total
= cost
->movzx
;
32424 *total
= cost
->movsx
;
32428 if (SCALAR_INT_MODE_P (mode
)
32429 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
32430 && CONST_INT_P (XEXP (x
, 1)))
32432 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32435 *total
= cost
->add
;
32438 if ((value
== 2 || value
== 3)
32439 && cost
->lea
<= cost
->shift_const
)
32441 *total
= cost
->lea
;
32451 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32453 /* ??? Should be SSE vector operation cost. */
32454 /* At least for published AMD latencies, this really is the same
32455 as the latency for a simple fpu operation like fabs. */
32456 /* V*QImode is emulated with 1-11 insns. */
32457 if (mode
== V16QImode
|| mode
== V32QImode
)
32460 if (TARGET_XOP
&& mode
== V16QImode
)
32462 /* For XOP we use vpshab, which requires a broadcast of the
32463 value to the variable shift insn. For constants this
32464 means a V16Q const in mem; even when we can perform the
32465 shift with one insn set the cost to prefer paddb. */
32466 if (CONSTANT_P (XEXP (x
, 1)))
32468 *total
= (cost
->fabs
32469 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
32470 + (speed
? 2 : COSTS_N_BYTES (16)));
32475 else if (TARGET_SSSE3
)
32477 *total
= cost
->fabs
* count
;
32480 *total
= cost
->fabs
;
32482 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
32484 if (CONST_INT_P (XEXP (x
, 1)))
32486 if (INTVAL (XEXP (x
, 1)) > 32)
32487 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
32489 *total
= cost
->shift_const
* 2;
32493 if (GET_CODE (XEXP (x
, 1)) == AND
)
32494 *total
= cost
->shift_var
* 2;
32496 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
32501 if (CONST_INT_P (XEXP (x
, 1)))
32502 *total
= cost
->shift_const
;
32504 *total
= cost
->shift_var
;
32512 gcc_assert (FLOAT_MODE_P (mode
));
32513 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
32515 /* ??? SSE scalar/vector cost should be used here. */
32516 /* ??? Bald assumption that fma has the same cost as fmul. */
32517 *total
= cost
->fmul
;
32518 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
32520 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
32522 if (GET_CODE (sub
) == NEG
)
32523 sub
= XEXP (sub
, 0);
32524 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
32527 if (GET_CODE (sub
) == NEG
)
32528 sub
= XEXP (sub
, 0);
32529 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
32534 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32536 /* ??? SSE scalar cost should be used here. */
32537 *total
= cost
->fmul
;
32540 else if (X87_FLOAT_MODE_P (mode
))
32542 *total
= cost
->fmul
;
32545 else if (FLOAT_MODE_P (mode
))
32547 /* ??? SSE vector cost should be used here. */
32548 *total
= cost
->fmul
;
32551 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32553 /* V*QImode is emulated with 7-13 insns. */
32554 if (mode
== V16QImode
|| mode
== V32QImode
)
32557 if (TARGET_XOP
&& mode
== V16QImode
)
32559 else if (TARGET_SSSE3
)
32561 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
32563 /* V*DImode is emulated with 5-8 insns. */
32564 else if (mode
== V2DImode
|| mode
== V4DImode
)
32566 if (TARGET_XOP
&& mode
== V2DImode
)
32567 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
32569 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
32571 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
32572 insns, including two PMULUDQ. */
32573 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
32574 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
32576 *total
= cost
->fmul
;
32581 rtx op0
= XEXP (x
, 0);
32582 rtx op1
= XEXP (x
, 1);
32584 if (CONST_INT_P (XEXP (x
, 1)))
32586 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32587 for (nbits
= 0; value
!= 0; value
&= value
- 1)
32591 /* This is arbitrary. */
32594 /* Compute costs correctly for widening multiplication. */
32595 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
32596 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
32597 == GET_MODE_SIZE (mode
))
32599 int is_mulwiden
= 0;
32600 enum machine_mode inner_mode
= GET_MODE (op0
);
32602 if (GET_CODE (op0
) == GET_CODE (op1
))
32603 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
32604 else if (CONST_INT_P (op1
))
32606 if (GET_CODE (op0
) == SIGN_EXTEND
)
32607 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
32610 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
32614 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
32617 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
32618 + nbits
* cost
->mult_bit
32619 + rtx_cost (op0
, outer_code
, opno
, speed
)
32620 + rtx_cost (op1
, outer_code
, opno
, speed
));
32629 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32630 /* ??? SSE cost should be used here. */
32631 *total
= cost
->fdiv
;
32632 else if (X87_FLOAT_MODE_P (mode
))
32633 *total
= cost
->fdiv
;
32634 else if (FLOAT_MODE_P (mode
))
32635 /* ??? SSE vector cost should be used here. */
32636 *total
= cost
->fdiv
;
32638 *total
= cost
->divide
[MODE_INDEX (mode
)];
32642 if (GET_MODE_CLASS (mode
) == MODE_INT
32643 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
32645 if (GET_CODE (XEXP (x
, 0)) == PLUS
32646 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
32647 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
32648 && CONSTANT_P (XEXP (x
, 1)))
32650 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
32651 if (val
== 2 || val
== 4 || val
== 8)
32653 *total
= cost
->lea
;
32654 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32655 outer_code
, opno
, speed
);
32656 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
32657 outer_code
, opno
, speed
);
32658 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32662 else if (GET_CODE (XEXP (x
, 0)) == MULT
32663 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
32665 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
32666 if (val
== 2 || val
== 4 || val
== 8)
32668 *total
= cost
->lea
;
32669 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32670 outer_code
, opno
, speed
);
32671 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32675 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
32677 *total
= cost
->lea
;
32678 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32679 outer_code
, opno
, speed
);
32680 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32681 outer_code
, opno
, speed
);
32682 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32689 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32691 /* ??? SSE cost should be used here. */
32692 *total
= cost
->fadd
;
32695 else if (X87_FLOAT_MODE_P (mode
))
32697 *total
= cost
->fadd
;
32700 else if (FLOAT_MODE_P (mode
))
32702 /* ??? SSE vector cost should be used here. */
32703 *total
= cost
->fadd
;
32711 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
32713 *total
= (cost
->add
* 2
32714 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
32715 << (GET_MODE (XEXP (x
, 0)) != DImode
))
32716 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
32717 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
32723 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32725 /* ??? SSE cost should be used here. */
32726 *total
= cost
->fchs
;
32729 else if (X87_FLOAT_MODE_P (mode
))
32731 *total
= cost
->fchs
;
32734 else if (FLOAT_MODE_P (mode
))
32736 /* ??? SSE vector cost should be used here. */
32737 *total
= cost
->fchs
;
32743 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32745 /* ??? Should be SSE vector operation cost. */
32746 /* At least for published AMD latencies, this really is the same
32747 as the latency for a simple fpu operation like fabs. */
32748 *total
= cost
->fabs
;
32750 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
32751 *total
= cost
->add
* 2;
32753 *total
= cost
->add
;
32757 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
32758 && XEXP (XEXP (x
, 0), 1) == const1_rtx
32759 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
32760 && XEXP (x
, 1) == const0_rtx
)
32762 /* This kind of construct is implemented using test[bwl].
32763 Treat it as if we had an AND. */
32764 *total
= (cost
->add
32765 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
32766 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
32772 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
32777 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32778 /* ??? SSE cost should be used here. */
32779 *total
= cost
->fabs
;
32780 else if (X87_FLOAT_MODE_P (mode
))
32781 *total
= cost
->fabs
;
32782 else if (FLOAT_MODE_P (mode
))
32783 /* ??? SSE vector cost should be used here. */
32784 *total
= cost
->fabs
;
32788 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32789 /* ??? SSE cost should be used here. */
32790 *total
= cost
->fsqrt
;
32791 else if (X87_FLOAT_MODE_P (mode
))
32792 *total
= cost
->fsqrt
;
32793 else if (FLOAT_MODE_P (mode
))
32794 /* ??? SSE vector cost should be used here. */
32795 *total
= cost
->fsqrt
;
32799 if (XINT (x
, 1) == UNSPEC_TP
)
32806 case VEC_DUPLICATE
:
32807 /* ??? Assume all of these vector manipulation patterns are
32808 recognizable. In which case they all pretty much have the
32810 *total
= cost
->fabs
;
32820 static int current_machopic_label_num
;
32822 /* Given a symbol name and its associated stub, write out the
32823 definition of the stub. */
32826 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
32828 unsigned int length
;
32829 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
32830 int label
= ++current_machopic_label_num
;
32832 /* For 64-bit we shouldn't get here. */
32833 gcc_assert (!TARGET_64BIT
);
32835 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32836 symb
= targetm
.strip_name_encoding (symb
);
32838 length
= strlen (stub
);
32839 binder_name
= XALLOCAVEC (char, length
+ 32);
32840 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
32842 length
= strlen (symb
);
32843 symbol_name
= XALLOCAVEC (char, length
+ 32);
32844 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
32846 sprintf (lazy_ptr_name
, "L%d$lz", label
);
32848 if (MACHOPIC_ATT_STUB
)
32849 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
32850 else if (MACHOPIC_PURE
)
32851 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
32853 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
32855 fprintf (file
, "%s:\n", stub
);
32856 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32858 if (MACHOPIC_ATT_STUB
)
32860 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
32862 else if (MACHOPIC_PURE
)
32865 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32866 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
32867 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
32868 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
32869 label
, lazy_ptr_name
, label
);
32870 fprintf (file
, "\tjmp\t*%%ecx\n");
32873 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
32875 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
32876 it needs no stub-binding-helper. */
32877 if (MACHOPIC_ATT_STUB
)
32880 fprintf (file
, "%s:\n", binder_name
);
32884 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
32885 fprintf (file
, "\tpushl\t%%ecx\n");
32888 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
32890 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
32892 /* N.B. Keep the correspondence of these
32893 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
32894 old-pic/new-pic/non-pic stubs; altering this will break
32895 compatibility with existing dylibs. */
32898 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32899 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
32902 /* 16-byte -mdynamic-no-pic stub. */
32903 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
32905 fprintf (file
, "%s:\n", lazy_ptr_name
);
32906 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32907 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
32909 #endif /* TARGET_MACHO */
32911 /* Order the registers for register allocator. */
32914 x86_order_regs_for_local_alloc (void)
32919 /* First allocate the local general purpose registers. */
32920 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32921 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
32922 reg_alloc_order
[pos
++] = i
;
32924 /* Global general purpose registers. */
32925 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32926 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
32927 reg_alloc_order
[pos
++] = i
;
32929 /* x87 registers come first in case we are doing FP math
32931 if (!TARGET_SSE_MATH
)
32932 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32933 reg_alloc_order
[pos
++] = i
;
32935 /* SSE registers. */
32936 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
32937 reg_alloc_order
[pos
++] = i
;
32938 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
32939 reg_alloc_order
[pos
++] = i
;
32941 /* x87 registers. */
32942 if (TARGET_SSE_MATH
)
32943 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32944 reg_alloc_order
[pos
++] = i
;
32946 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
32947 reg_alloc_order
[pos
++] = i
;
32949 /* Initialize the rest of array as we do not allocate some registers
32951 while (pos
< FIRST_PSEUDO_REGISTER
)
32952 reg_alloc_order
[pos
++] = 0;
32955 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
32956 in struct attribute_spec handler. */
32958 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
32960 int flags ATTRIBUTE_UNUSED
,
32961 bool *no_add_attrs
)
32963 if (TREE_CODE (*node
) != FUNCTION_TYPE
32964 && TREE_CODE (*node
) != METHOD_TYPE
32965 && TREE_CODE (*node
) != FIELD_DECL
32966 && TREE_CODE (*node
) != TYPE_DECL
)
32968 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32970 *no_add_attrs
= true;
32975 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
32977 *no_add_attrs
= true;
32980 if (is_attribute_p ("callee_pop_aggregate_return", name
))
32984 cst
= TREE_VALUE (args
);
32985 if (TREE_CODE (cst
) != INTEGER_CST
)
32987 warning (OPT_Wattributes
,
32988 "%qE attribute requires an integer constant argument",
32990 *no_add_attrs
= true;
32992 else if (compare_tree_int (cst
, 0) != 0
32993 && compare_tree_int (cst
, 1) != 0)
32995 warning (OPT_Wattributes
,
32996 "argument to %qE attribute is neither zero, nor one",
32998 *no_add_attrs
= true;
33007 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
33008 struct attribute_spec.handler. */
33010 ix86_handle_abi_attribute (tree
*node
, tree name
,
33011 tree args ATTRIBUTE_UNUSED
,
33012 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
33014 if (TREE_CODE (*node
) != FUNCTION_TYPE
33015 && TREE_CODE (*node
) != METHOD_TYPE
33016 && TREE_CODE (*node
) != FIELD_DECL
33017 && TREE_CODE (*node
) != TYPE_DECL
)
33019 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
33021 *no_add_attrs
= true;
33025 /* Can combine regparm with all attributes but fastcall. */
33026 if (is_attribute_p ("ms_abi", name
))
33028 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
33030 error ("ms_abi and sysv_abi attributes are not compatible");
33035 else if (is_attribute_p ("sysv_abi", name
))
33037 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
33039 error ("ms_abi and sysv_abi attributes are not compatible");
33048 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
33049 struct attribute_spec.handler. */
33051 ix86_handle_struct_attribute (tree
*node
, tree name
,
33052 tree args ATTRIBUTE_UNUSED
,
33053 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
33056 if (DECL_P (*node
))
33058 if (TREE_CODE (*node
) == TYPE_DECL
)
33059 type
= &TREE_TYPE (*node
);
33064 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
33066 warning (OPT_Wattributes
, "%qE attribute ignored",
33068 *no_add_attrs
= true;
33071 else if ((is_attribute_p ("ms_struct", name
)
33072 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
33073 || ((is_attribute_p ("gcc_struct", name
)
33074 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
33076 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
33078 *no_add_attrs
= true;
33085 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
33086 tree args ATTRIBUTE_UNUSED
,
33087 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
33089 if (TREE_CODE (*node
) != FUNCTION_DECL
)
33091 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
33093 *no_add_attrs
= true;
33099 ix86_ms_bitfield_layout_p (const_tree record_type
)
33101 return ((TARGET_MS_BITFIELD_LAYOUT
33102 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
33103 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
33106 /* Returns an expression indicating where the this parameter is
33107 located on entry to the FUNCTION. */
33110 x86_this_parameter (tree function
)
33112 tree type
= TREE_TYPE (function
);
33113 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
33118 const int *parm_regs
;
33120 if (ix86_function_type_abi (type
) == MS_ABI
)
33121 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
33123 parm_regs
= x86_64_int_parameter_registers
;
33124 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
33127 nregs
= ix86_function_regparm (type
, function
);
33129 if (nregs
> 0 && !stdarg_p (type
))
33132 unsigned int ccvt
= ix86_get_callcvt (type
);
33134 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
33135 regno
= aggr
? DX_REG
: CX_REG
;
33136 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
33140 return gen_rtx_MEM (SImode
,
33141 plus_constant (Pmode
, stack_pointer_rtx
, 4));
33150 return gen_rtx_MEM (SImode
,
33151 plus_constant (Pmode
,
33152 stack_pointer_rtx
, 4));
33155 return gen_rtx_REG (SImode
, regno
);
33158 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
33162 /* Determine whether x86_output_mi_thunk can succeed. */
33165 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
33166 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
33167 HOST_WIDE_INT vcall_offset
, const_tree function
)
33169 /* 64-bit can handle anything. */
33173 /* For 32-bit, everything's fine if we have one free register. */
33174 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
33177 /* Need a free register for vcall_offset. */
33181 /* Need a free register for GOT references. */
33182 if (flag_pic
&& !targetm
.binds_local_p (function
))
33185 /* Otherwise ok. */
33189 /* Output the assembler code for a thunk function. THUNK_DECL is the
33190 declaration for the thunk function itself, FUNCTION is the decl for
33191 the target function. DELTA is an immediate constant offset to be
33192 added to THIS. If VCALL_OFFSET is nonzero, the word at
33193 *(*this + vcall_offset) should be added to THIS. */
33196 x86_output_mi_thunk (FILE *file
,
33197 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
33198 HOST_WIDE_INT vcall_offset
, tree function
)
33200 rtx this_param
= x86_this_parameter (function
);
33201 rtx this_reg
, tmp
, fnaddr
;
33202 unsigned int tmp_regno
;
33205 tmp_regno
= R10_REG
;
33208 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
33209 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
33210 tmp_regno
= AX_REG
;
33212 tmp_regno
= CX_REG
;
33215 emit_note (NOTE_INSN_PROLOGUE_END
);
33217 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
33218 pull it in now and let DELTA benefit. */
33219 if (REG_P (this_param
))
33220 this_reg
= this_param
;
33221 else if (vcall_offset
)
33223 /* Put the this parameter into %eax. */
33224 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
33225 emit_move_insn (this_reg
, this_param
);
33228 this_reg
= NULL_RTX
;
33230 /* Adjust the this parameter by a fixed constant. */
33233 rtx delta_rtx
= GEN_INT (delta
);
33234 rtx delta_dst
= this_reg
? this_reg
: this_param
;
33238 if (!x86_64_general_operand (delta_rtx
, Pmode
))
33240 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
33241 emit_move_insn (tmp
, delta_rtx
);
33246 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
33249 /* Adjust the this parameter by a value stored in the vtable. */
33252 rtx vcall_addr
, vcall_mem
, this_mem
;
33254 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
33256 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
33257 if (Pmode
!= ptr_mode
)
33258 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
33259 emit_move_insn (tmp
, this_mem
);
33261 /* Adjust the this parameter. */
33262 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
33264 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
33266 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
33267 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
33268 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
33271 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
33272 if (Pmode
!= ptr_mode
)
33273 emit_insn (gen_addsi_1_zext (this_reg
,
33274 gen_rtx_REG (ptr_mode
,
33278 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
33281 /* If necessary, drop THIS back to its stack slot. */
33282 if (this_reg
&& this_reg
!= this_param
)
33283 emit_move_insn (this_param
, this_reg
);
33285 fnaddr
= XEXP (DECL_RTL (function
), 0);
33288 if (!flag_pic
|| targetm
.binds_local_p (function
)
33289 || cfun
->machine
->call_abi
== MS_ABI
)
33293 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
33294 tmp
= gen_rtx_CONST (Pmode
, tmp
);
33295 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
33300 if (!flag_pic
|| targetm
.binds_local_p (function
))
33303 else if (TARGET_MACHO
)
33305 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
33306 fnaddr
= XEXP (fnaddr
, 0);
33308 #endif /* TARGET_MACHO */
33311 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
33312 output_set_got (tmp
, NULL_RTX
);
33314 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
33315 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
33316 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
33320 /* Our sibling call patterns do not allow memories, because we have no
33321 predicate that can distinguish between frame and non-frame memory.
33322 For our purposes here, we can get away with (ab)using a jump pattern,
33323 because we're going to do no optimization. */
33324 if (MEM_P (fnaddr
))
33325 emit_jump_insn (gen_indirect_jump (fnaddr
));
33328 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
33329 fnaddr
= legitimize_pic_address (fnaddr
,
33330 gen_rtx_REG (Pmode
, tmp_regno
));
33332 if (!sibcall_insn_operand (fnaddr
, word_mode
))
33334 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
33335 if (GET_MODE (fnaddr
) != word_mode
)
33336 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
33337 emit_move_insn (tmp
, fnaddr
);
33341 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
33342 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
33343 tmp
= emit_call_insn (tmp
);
33344 SIBLING_CALL_P (tmp
) = 1;
33348 /* Emit just enough of rest_of_compilation to get the insns emitted.
33349 Note that use_thunk calls assemble_start_function et al. */
33350 tmp
= get_insns ();
33351 insn_locators_alloc ();
33352 shorten_branches (tmp
);
33353 final_start_function (tmp
, file
, 1);
33354 final (tmp
, file
, 1);
33355 final_end_function ();
33359 x86_file_start (void)
33361 default_file_start ();
33363 darwin_file_start ();
33365 if (X86_FILE_START_VERSION_DIRECTIVE
)
33366 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
33367 if (X86_FILE_START_FLTUSED
)
33368 fputs ("\t.global\t__fltused\n", asm_out_file
);
33369 if (ix86_asm_dialect
== ASM_INTEL
)
33370 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
33374 x86_field_alignment (tree field
, int computed
)
33376 enum machine_mode mode
;
33377 tree type
= TREE_TYPE (field
);
33379 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
33381 mode
= TYPE_MODE (strip_array_types (type
));
33382 if (mode
== DFmode
|| mode
== DCmode
33383 || GET_MODE_CLASS (mode
) == MODE_INT
33384 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
33385 return MIN (32, computed
);
33389 /* Output assembler code to FILE to increment profiler label # LABELNO
33390 for profiling a function entry. */
33392 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
33394 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
33399 #ifndef NO_PROFILE_COUNTERS
33400 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
33403 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
33404 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
33406 fprintf (file
, "\tcall\t%s\n", mcount_name
);
33410 #ifndef NO_PROFILE_COUNTERS
33411 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
33414 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
33418 #ifndef NO_PROFILE_COUNTERS
33419 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
33422 fprintf (file
, "\tcall\t%s\n", mcount_name
);
33426 /* We don't have exact information about the insn sizes, but we may assume
33427 quite safely that we are informed about all 1 byte insns and memory
33428 address sizes. This is enough to eliminate unnecessary padding in
33432 min_insn_size (rtx insn
)
33436 if (!INSN_P (insn
) || !active_insn_p (insn
))
33439 /* Discard alignments we've emit and jump instructions. */
33440 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
33441 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
33443 if (JUMP_TABLE_DATA_P (insn
))
33446 /* Important case - calls are always 5 bytes.
33447 It is common to have many calls in the row. */
33449 && symbolic_reference_mentioned_p (PATTERN (insn
))
33450 && !SIBLING_CALL_P (insn
))
33452 len
= get_attr_length (insn
);
33456 /* For normal instructions we rely on get_attr_length being exact,
33457 with a few exceptions. */
33458 if (!JUMP_P (insn
))
33460 enum attr_type type
= get_attr_type (insn
);
33465 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
33466 || asm_noperands (PATTERN (insn
)) >= 0)
33473 /* Otherwise trust get_attr_length. */
33477 l
= get_attr_length_address (insn
);
33478 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
33487 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33489 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
33493 ix86_avoid_jump_mispredicts (void)
33495 rtx insn
, start
= get_insns ();
33496 int nbytes
= 0, njumps
= 0;
33499 /* Look for all minimal intervals of instructions containing 4 jumps.
33500 The intervals are bounded by START and INSN. NBYTES is the total
33501 size of instructions in the interval including INSN and not including
33502 START. When the NBYTES is smaller than 16 bytes, it is possible
33503 that the end of START and INSN ends up in the same 16byte page.
33505 The smallest offset in the page INSN can start is the case where START
33506 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
33507 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
33509 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
33513 if (LABEL_P (insn
))
33515 int align
= label_to_alignment (insn
);
33516 int max_skip
= label_to_max_skip (insn
);
33520 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
33521 already in the current 16 byte page, because otherwise
33522 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
33523 bytes to reach 16 byte boundary. */
33525 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
33528 fprintf (dump_file
, "Label %i with max_skip %i\n",
33529 INSN_UID (insn
), max_skip
);
33532 while (nbytes
+ max_skip
>= 16)
33534 start
= NEXT_INSN (start
);
33535 if ((JUMP_P (start
)
33536 && GET_CODE (PATTERN (start
)) != ADDR_VEC
33537 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
33539 njumps
--, isjump
= 1;
33542 nbytes
-= min_insn_size (start
);
33548 min_size
= min_insn_size (insn
);
33549 nbytes
+= min_size
;
33551 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
33552 INSN_UID (insn
), min_size
);
33554 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
33555 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
33563 start
= NEXT_INSN (start
);
33564 if ((JUMP_P (start
)
33565 && GET_CODE (PATTERN (start
)) != ADDR_VEC
33566 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
33568 njumps
--, isjump
= 1;
33571 nbytes
-= min_insn_size (start
);
33573 gcc_assert (njumps
>= 0);
33575 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
33576 INSN_UID (start
), INSN_UID (insn
), nbytes
);
33578 if (njumps
== 3 && isjump
&& nbytes
< 16)
33580 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
33583 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
33584 INSN_UID (insn
), padsize
);
33585 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
33591 /* AMD Athlon works faster
33592 when RET is not destination of conditional jump or directly preceded
33593 by other jump instruction. We avoid the penalty by inserting NOP just
33594 before the RET instructions in such cases. */
33596 ix86_pad_returns (void)
33601 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33603 basic_block bb
= e
->src
;
33604 rtx ret
= BB_END (bb
);
33606 bool replace
= false;
33608 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
33609 || optimize_bb_for_size_p (bb
))
33611 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
33612 if (active_insn_p (prev
) || LABEL_P (prev
))
33614 if (prev
&& LABEL_P (prev
))
33619 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33620 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
33621 && !(e
->flags
& EDGE_FALLTHRU
))
33626 prev
= prev_active_insn (ret
);
33628 && ((JUMP_P (prev
) && any_condjump_p (prev
))
33631 /* Empty functions get branch mispredict even when
33632 the jump destination is not visible to us. */
33633 if (!prev
&& !optimize_function_for_size_p (cfun
))
33638 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
33644 /* Count the minimum number of instructions in BB. Return 4 if the
33645 number of instructions >= 4. */
33648 ix86_count_insn_bb (basic_block bb
)
33651 int insn_count
= 0;
33653 /* Count number of instructions in this block. Return 4 if the number
33654 of instructions >= 4. */
33655 FOR_BB_INSNS (bb
, insn
)
33657 /* Only happen in exit blocks. */
33659 && ANY_RETURN_P (PATTERN (insn
)))
33662 if (NONDEBUG_INSN_P (insn
)
33663 && GET_CODE (PATTERN (insn
)) != USE
33664 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
33667 if (insn_count
>= 4)
33676 /* Count the minimum number of instructions in code path in BB.
33677 Return 4 if the number of instructions >= 4. */
33680 ix86_count_insn (basic_block bb
)
33684 int min_prev_count
;
33686 /* Only bother counting instructions along paths with no
33687 more than 2 basic blocks between entry and exit. Given
33688 that BB has an edge to exit, determine if a predecessor
33689 of BB has an edge from entry. If so, compute the number
33690 of instructions in the predecessor block. If there
33691 happen to be multiple such blocks, compute the minimum. */
33692 min_prev_count
= 4;
33693 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33696 edge_iterator prev_ei
;
33698 if (e
->src
== ENTRY_BLOCK_PTR
)
33700 min_prev_count
= 0;
33703 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
33705 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
33707 int count
= ix86_count_insn_bb (e
->src
);
33708 if (count
< min_prev_count
)
33709 min_prev_count
= count
;
33715 if (min_prev_count
< 4)
33716 min_prev_count
+= ix86_count_insn_bb (bb
);
33718 return min_prev_count
;
33721 /* Pad short function to 4 instructions. */
33724 ix86_pad_short_function (void)
33729 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33731 rtx ret
= BB_END (e
->src
);
33732 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
33734 int insn_count
= ix86_count_insn (e
->src
);
33736 /* Pad short function. */
33737 if (insn_count
< 4)
33741 /* Find epilogue. */
33744 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
33745 insn
= PREV_INSN (insn
);
33750 /* Two NOPs count as one instruction. */
33751 insn_count
= 2 * (4 - insn_count
);
33752 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
33758 /* Implement machine specific optimizations. We implement padding of returns
33759 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
33763 /* We are freeing block_for_insn in the toplev to keep compatibility
33764 with old MDEP_REORGS that are not CFG based. Recompute it now. */
33765 compute_bb_for_insn ();
33767 /* Run the vzeroupper optimization if needed. */
33768 if (TARGET_VZEROUPPER
)
33769 move_or_delete_vzeroupper ();
33771 if (optimize
&& optimize_function_for_speed_p (cfun
))
33773 if (TARGET_PAD_SHORT_FUNCTION
)
33774 ix86_pad_short_function ();
33775 else if (TARGET_PAD_RETURNS
)
33776 ix86_pad_returns ();
33777 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33778 if (TARGET_FOUR_JUMP_LIMIT
)
33779 ix86_avoid_jump_mispredicts ();
33784 /* Return nonzero when QImode register that must be represented via REX prefix
33787 x86_extended_QIreg_mentioned_p (rtx insn
)
33790 extract_insn_cached (insn
);
33791 for (i
= 0; i
< recog_data
.n_operands
; i
++)
33792 if (GENERAL_REG_P (recog_data
.operand
[i
])
33793 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
33798 /* Return nonzero when P points to register encoded via REX prefix.
33799 Called via for_each_rtx. */
33801 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
33803 unsigned int regno
;
33806 regno
= REGNO (*p
);
33807 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
33810 /* Return true when INSN mentions register that must be encoded using REX
33813 x86_extended_reg_mentioned_p (rtx insn
)
33815 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
33816 extended_reg_mentioned_1
, NULL
);
33819 /* If profitable, negate (without causing overflow) integer constant
33820 of mode MODE at location LOC. Return true in this case. */
33822 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
33826 if (!CONST_INT_P (*loc
))
33832 /* DImode x86_64 constants must fit in 32 bits. */
33833 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
33844 gcc_unreachable ();
33847 /* Avoid overflows. */
33848 if (mode_signbit_p (mode
, *loc
))
33851 val
= INTVAL (*loc
);
33853 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
33854 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
33855 if ((val
< 0 && val
!= -128)
33858 *loc
= GEN_INT (-val
);
33865 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
33866 optabs would emit if we didn't have TFmode patterns. */
33869 x86_emit_floatuns (rtx operands
[2])
33871 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
33872 enum machine_mode mode
, inmode
;
33874 inmode
= GET_MODE (operands
[1]);
33875 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
33878 in
= force_reg (inmode
, operands
[1]);
33879 mode
= GET_MODE (out
);
33880 neglab
= gen_label_rtx ();
33881 donelab
= gen_label_rtx ();
33882 f0
= gen_reg_rtx (mode
);
33884 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
33886 expand_float (out
, in
, 0);
33888 emit_jump_insn (gen_jump (donelab
));
33891 emit_label (neglab
);
33893 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
33895 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
33897 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
33899 expand_float (f0
, i0
, 0);
33901 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
33903 emit_label (donelab
);
33906 /* AVX2 does support 32-byte integer vector operations,
33907 thus the longest vector we are faced with is V32QImode. */
33908 #define MAX_VECT_LEN 32
33910 struct expand_vec_perm_d
33912 rtx target
, op0
, op1
;
33913 unsigned char perm
[MAX_VECT_LEN
];
33914 enum machine_mode vmode
;
33915 unsigned char nelt
;
33916 bool one_operand_p
;
33920 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
33921 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
33922 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
33924 /* Get a vector mode of the same size as the original but with elements
33925 twice as wide. This is only guaranteed to apply to integral vectors. */
33927 static inline enum machine_mode
33928 get_mode_wider_vector (enum machine_mode o
)
33930 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
33931 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
33932 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
33933 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
33937 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33938 with all elements equal to VAR. Return true if successful. */
33941 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
33942 rtx target
, rtx val
)
33965 /* First attempt to recognize VAL as-is. */
33966 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33967 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
33968 if (recog_memoized (insn
) < 0)
33971 /* If that fails, force VAL into a register. */
33974 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
33975 seq
= get_insns ();
33978 emit_insn_before (seq
, insn
);
33980 ok
= recog_memoized (insn
) >= 0;
33989 if (TARGET_SSE
|| TARGET_3DNOW_A
)
33993 val
= gen_lowpart (SImode
, val
);
33994 x
= gen_rtx_TRUNCATE (HImode
, val
);
33995 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
33996 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
34009 struct expand_vec_perm_d dperm
;
34013 memset (&dperm
, 0, sizeof (dperm
));
34014 dperm
.target
= target
;
34015 dperm
.vmode
= mode
;
34016 dperm
.nelt
= GET_MODE_NUNITS (mode
);
34017 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
34018 dperm
.one_operand_p
= true;
34020 /* Extend to SImode using a paradoxical SUBREG. */
34021 tmp1
= gen_reg_rtx (SImode
);
34022 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
34024 /* Insert the SImode value as low element of a V4SImode vector. */
34025 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
34026 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
34028 ok
= (expand_vec_perm_1 (&dperm
)
34029 || expand_vec_perm_broadcast_1 (&dperm
));
34041 /* Replicate the value once into the next wider mode and recurse. */
34043 enum machine_mode smode
, wsmode
, wvmode
;
34046 smode
= GET_MODE_INNER (mode
);
34047 wvmode
= get_mode_wider_vector (mode
);
34048 wsmode
= GET_MODE_INNER (wvmode
);
34050 val
= convert_modes (wsmode
, smode
, val
, true);
34051 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
34052 GEN_INT (GET_MODE_BITSIZE (smode
)),
34053 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
34054 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
34056 x
= gen_lowpart (wvmode
, target
);
34057 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
34065 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
34066 rtx x
= gen_reg_rtx (hvmode
);
34068 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
34071 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
34072 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
34081 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
34082 whose ONE_VAR element is VAR, and other elements are zero. Return true
34086 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
34087 rtx target
, rtx var
, int one_var
)
34089 enum machine_mode vsimode
;
34092 bool use_vector_set
= false;
34097 /* For SSE4.1, we normally use vector set. But if the second
34098 element is zero and inter-unit moves are OK, we use movq
34100 use_vector_set
= (TARGET_64BIT
34102 && !(TARGET_INTER_UNIT_MOVES
34108 use_vector_set
= TARGET_SSE4_1
;
34111 use_vector_set
= TARGET_SSE2
;
34114 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
34121 use_vector_set
= TARGET_AVX
;
34124 /* Use ix86_expand_vector_set in 64bit mode only. */
34125 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
34131 if (use_vector_set
)
34133 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
34134 var
= force_reg (GET_MODE_INNER (mode
), var
);
34135 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
34151 var
= force_reg (GET_MODE_INNER (mode
), var
);
34152 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
34153 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
34158 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
34159 new_target
= gen_reg_rtx (mode
);
34161 new_target
= target
;
34162 var
= force_reg (GET_MODE_INNER (mode
), var
);
34163 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
34164 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
34165 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
34168 /* We need to shuffle the value to the correct position, so
34169 create a new pseudo to store the intermediate result. */
34171 /* With SSE2, we can use the integer shuffle insns. */
34172 if (mode
!= V4SFmode
&& TARGET_SSE2
)
34174 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
34176 GEN_INT (one_var
== 1 ? 0 : 1),
34177 GEN_INT (one_var
== 2 ? 0 : 1),
34178 GEN_INT (one_var
== 3 ? 0 : 1)));
34179 if (target
!= new_target
)
34180 emit_move_insn (target
, new_target
);
34184 /* Otherwise convert the intermediate result to V4SFmode and
34185 use the SSE1 shuffle instructions. */
34186 if (mode
!= V4SFmode
)
34188 tmp
= gen_reg_rtx (V4SFmode
);
34189 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
34194 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
34196 GEN_INT (one_var
== 1 ? 0 : 1),
34197 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
34198 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
34200 if (mode
!= V4SFmode
)
34201 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
34202 else if (tmp
!= target
)
34203 emit_move_insn (target
, tmp
);
34205 else if (target
!= new_target
)
34206 emit_move_insn (target
, new_target
);
34211 vsimode
= V4SImode
;
34217 vsimode
= V2SImode
;
34223 /* Zero extend the variable element to SImode and recurse. */
34224 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
34226 x
= gen_reg_rtx (vsimode
);
34227 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
34229 gcc_unreachable ();
34231 emit_move_insn (target
, gen_lowpart (mode
, x
));
34239 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
34240 consisting of the values in VALS. It is known that all elements
34241 except ONE_VAR are constants. Return true if successful. */
34244 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
34245 rtx target
, rtx vals
, int one_var
)
34247 rtx var
= XVECEXP (vals
, 0, one_var
);
34248 enum machine_mode wmode
;
34251 const_vec
= copy_rtx (vals
);
34252 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
34253 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
34261 /* For the two element vectors, it's just as easy to use
34262 the general case. */
34266 /* Use ix86_expand_vector_set in 64bit mode only. */
34289 /* There's no way to set one QImode entry easily. Combine
34290 the variable value with its adjacent constant value, and
34291 promote to an HImode set. */
34292 x
= XVECEXP (vals
, 0, one_var
^ 1);
34295 var
= convert_modes (HImode
, QImode
, var
, true);
34296 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
34297 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
34298 x
= GEN_INT (INTVAL (x
) & 0xff);
34302 var
= convert_modes (HImode
, QImode
, var
, true);
34303 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
34305 if (x
!= const0_rtx
)
34306 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
34307 1, OPTAB_LIB_WIDEN
);
34309 x
= gen_reg_rtx (wmode
);
34310 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
34311 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
34313 emit_move_insn (target
, gen_lowpart (mode
, x
));
34320 emit_move_insn (target
, const_vec
);
34321 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
34325 /* A subroutine of ix86_expand_vector_init_general. Use vector
34326 concatenate to handle the most general case: all values variable,
34327 and none identical. */
34330 ix86_expand_vector_init_concat (enum machine_mode mode
,
34331 rtx target
, rtx
*ops
, int n
)
34333 enum machine_mode cmode
, hmode
= VOIDmode
;
34334 rtx first
[8], second
[4];
34374 gcc_unreachable ();
34377 if (!register_operand (ops
[1], cmode
))
34378 ops
[1] = force_reg (cmode
, ops
[1]);
34379 if (!register_operand (ops
[0], cmode
))
34380 ops
[0] = force_reg (cmode
, ops
[0]);
34381 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34382 gen_rtx_VEC_CONCAT (mode
, ops
[0],
34402 gcc_unreachable ();
34418 gcc_unreachable ();
34423 /* FIXME: We process inputs backward to help RA. PR 36222. */
34426 for (; i
> 0; i
-= 2, j
--)
34428 first
[j
] = gen_reg_rtx (cmode
);
34429 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
34430 ix86_expand_vector_init (false, first
[j
],
34431 gen_rtx_PARALLEL (cmode
, v
));
34437 gcc_assert (hmode
!= VOIDmode
);
34438 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
34440 second
[j
] = gen_reg_rtx (hmode
);
34441 ix86_expand_vector_init_concat (hmode
, second
[j
],
34445 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
34448 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
34452 gcc_unreachable ();
34456 /* A subroutine of ix86_expand_vector_init_general. Use vector
34457 interleave to handle the most general case: all values variable,
34458 and none identical. */
34461 ix86_expand_vector_init_interleave (enum machine_mode mode
,
34462 rtx target
, rtx
*ops
, int n
)
34464 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
34467 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
34468 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
34469 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
34474 gen_load_even
= gen_vec_setv8hi
;
34475 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
34476 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
34477 inner_mode
= HImode
;
34478 first_imode
= V4SImode
;
34479 second_imode
= V2DImode
;
34480 third_imode
= VOIDmode
;
34483 gen_load_even
= gen_vec_setv16qi
;
34484 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
34485 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
34486 inner_mode
= QImode
;
34487 first_imode
= V8HImode
;
34488 second_imode
= V4SImode
;
34489 third_imode
= V2DImode
;
34492 gcc_unreachable ();
34495 for (i
= 0; i
< n
; i
++)
34497 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
34498 op0
= gen_reg_rtx (SImode
);
34499 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
34501 /* Insert the SImode value as low element of V4SImode vector. */
34502 op1
= gen_reg_rtx (V4SImode
);
34503 op0
= gen_rtx_VEC_MERGE (V4SImode
,
34504 gen_rtx_VEC_DUPLICATE (V4SImode
,
34506 CONST0_RTX (V4SImode
),
34508 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
34510 /* Cast the V4SImode vector back to a vector in orignal mode. */
34511 op0
= gen_reg_rtx (mode
);
34512 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
34514 /* Load even elements into the second positon. */
34515 emit_insn (gen_load_even (op0
,
34516 force_reg (inner_mode
,
34520 /* Cast vector to FIRST_IMODE vector. */
34521 ops
[i
] = gen_reg_rtx (first_imode
);
34522 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
34525 /* Interleave low FIRST_IMODE vectors. */
34526 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
34528 op0
= gen_reg_rtx (first_imode
);
34529 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
34531 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
34532 ops
[j
] = gen_reg_rtx (second_imode
);
34533 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
34536 /* Interleave low SECOND_IMODE vectors. */
34537 switch (second_imode
)
34540 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
34542 op0
= gen_reg_rtx (second_imode
);
34543 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
34546 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
34548 ops
[j
] = gen_reg_rtx (third_imode
);
34549 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
34551 second_imode
= V2DImode
;
34552 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
34556 op0
= gen_reg_rtx (second_imode
);
34557 emit_insn (gen_interleave_second_low (op0
, ops
[0],
34560 /* Cast the SECOND_IMODE vector back to a vector on original
34562 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34563 gen_lowpart (mode
, op0
)));
34567 gcc_unreachable ();
34571 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
34572 all values variable, and none identical. */
34575 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
34576 rtx target
, rtx vals
)
34578 rtx ops
[32], op0
, op1
;
34579 enum machine_mode half_mode
= VOIDmode
;
34586 if (!mmx_ok
&& !TARGET_SSE
)
34598 n
= GET_MODE_NUNITS (mode
);
34599 for (i
= 0; i
< n
; i
++)
34600 ops
[i
] = XVECEXP (vals
, 0, i
);
34601 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
34605 half_mode
= V16QImode
;
34609 half_mode
= V8HImode
;
34613 n
= GET_MODE_NUNITS (mode
);
34614 for (i
= 0; i
< n
; i
++)
34615 ops
[i
] = XVECEXP (vals
, 0, i
);
34616 op0
= gen_reg_rtx (half_mode
);
34617 op1
= gen_reg_rtx (half_mode
);
34618 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
34620 ix86_expand_vector_init_interleave (half_mode
, op1
,
34621 &ops
[n
>> 1], n
>> 2);
34622 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34623 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
34627 if (!TARGET_SSE4_1
)
34635 /* Don't use ix86_expand_vector_init_interleave if we can't
34636 move from GPR to SSE register directly. */
34637 if (!TARGET_INTER_UNIT_MOVES
)
34640 n
= GET_MODE_NUNITS (mode
);
34641 for (i
= 0; i
< n
; i
++)
34642 ops
[i
] = XVECEXP (vals
, 0, i
);
34643 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
34651 gcc_unreachable ();
34655 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
34656 enum machine_mode inner_mode
;
34657 rtx words
[4], shift
;
34659 inner_mode
= GET_MODE_INNER (mode
);
34660 n_elts
= GET_MODE_NUNITS (mode
);
34661 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
34662 n_elt_per_word
= n_elts
/ n_words
;
34663 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
34665 for (i
= 0; i
< n_words
; ++i
)
34667 rtx word
= NULL_RTX
;
34669 for (j
= 0; j
< n_elt_per_word
; ++j
)
34671 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
34672 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
34678 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
34679 word
, 1, OPTAB_LIB_WIDEN
);
34680 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
34681 word
, 1, OPTAB_LIB_WIDEN
);
34689 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
34690 else if (n_words
== 2)
34692 rtx tmp
= gen_reg_rtx (mode
);
34693 emit_clobber (tmp
);
34694 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
34695 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
34696 emit_move_insn (target
, tmp
);
34698 else if (n_words
== 4)
34700 rtx tmp
= gen_reg_rtx (V4SImode
);
34701 gcc_assert (word_mode
== SImode
);
34702 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
34703 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
34704 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
34707 gcc_unreachable ();
34711 /* Initialize vector TARGET via VALS. Suppress the use of MMX
34712 instructions unless MMX_OK is true. */
34715 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
34717 enum machine_mode mode
= GET_MODE (target
);
34718 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34719 int n_elts
= GET_MODE_NUNITS (mode
);
34720 int n_var
= 0, one_var
= -1;
34721 bool all_same
= true, all_const_zero
= true;
34725 for (i
= 0; i
< n_elts
; ++i
)
34727 x
= XVECEXP (vals
, 0, i
);
34728 if (!(CONST_INT_P (x
)
34729 || GET_CODE (x
) == CONST_DOUBLE
34730 || GET_CODE (x
) == CONST_FIXED
))
34731 n_var
++, one_var
= i
;
34732 else if (x
!= CONST0_RTX (inner_mode
))
34733 all_const_zero
= false;
34734 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
34738 /* Constants are best loaded from the constant pool. */
34741 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
34745 /* If all values are identical, broadcast the value. */
34747 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
34748 XVECEXP (vals
, 0, 0)))
34751 /* Values where only one field is non-constant are best loaded from
34752 the pool and overwritten via move later. */
34756 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
34757 XVECEXP (vals
, 0, one_var
),
34761 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
34765 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
34769 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
34771 enum machine_mode mode
= GET_MODE (target
);
34772 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34773 enum machine_mode half_mode
;
34774 bool use_vec_merge
= false;
34776 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
34778 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
34779 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
34780 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
34781 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
34782 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
34783 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
34785 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
34787 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
34788 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
34789 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
34790 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
34791 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
34792 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
34802 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34803 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
34805 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34807 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34808 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34814 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
34818 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34819 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
34821 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34823 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34824 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34831 /* For the two element vectors, we implement a VEC_CONCAT with
34832 the extraction of the other element. */
34834 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
34835 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
34838 op0
= val
, op1
= tmp
;
34840 op0
= tmp
, op1
= val
;
34842 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
34843 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34848 use_vec_merge
= TARGET_SSE4_1
;
34855 use_vec_merge
= true;
34859 /* tmp = target = A B C D */
34860 tmp
= copy_to_reg (target
);
34861 /* target = A A B B */
34862 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
34863 /* target = X A B B */
34864 ix86_expand_vector_set (false, target
, val
, 0);
34865 /* target = A X C D */
34866 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34867 const1_rtx
, const0_rtx
,
34868 GEN_INT (2+4), GEN_INT (3+4)));
34872 /* tmp = target = A B C D */
34873 tmp
= copy_to_reg (target
);
34874 /* tmp = X B C D */
34875 ix86_expand_vector_set (false, tmp
, val
, 0);
34876 /* target = A B X D */
34877 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34878 const0_rtx
, const1_rtx
,
34879 GEN_INT (0+4), GEN_INT (3+4)));
34883 /* tmp = target = A B C D */
34884 tmp
= copy_to_reg (target
);
34885 /* tmp = X B C D */
34886 ix86_expand_vector_set (false, tmp
, val
, 0);
34887 /* target = A B X D */
34888 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34889 const0_rtx
, const1_rtx
,
34890 GEN_INT (2+4), GEN_INT (0+4)));
34894 gcc_unreachable ();
34899 use_vec_merge
= TARGET_SSE4_1
;
34903 /* Element 0 handled by vec_merge below. */
34906 use_vec_merge
= true;
34912 /* With SSE2, use integer shuffles to swap element 0 and ELT,
34913 store into element 0, then shuffle them back. */
34917 order
[0] = GEN_INT (elt
);
34918 order
[1] = const1_rtx
;
34919 order
[2] = const2_rtx
;
34920 order
[3] = GEN_INT (3);
34921 order
[elt
] = const0_rtx
;
34923 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34924 order
[1], order
[2], order
[3]));
34926 ix86_expand_vector_set (false, target
, val
, 0);
34928 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34929 order
[1], order
[2], order
[3]));
34933 /* For SSE1, we have to reuse the V4SF code. */
34934 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
34935 gen_lowpart (SFmode
, val
), elt
);
34940 use_vec_merge
= TARGET_SSE2
;
34943 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34947 use_vec_merge
= TARGET_SSE4_1
;
34954 half_mode
= V16QImode
;
34960 half_mode
= V8HImode
;
34966 half_mode
= V4SImode
;
34972 half_mode
= V2DImode
;
34978 half_mode
= V4SFmode
;
34984 half_mode
= V2DFmode
;
34990 /* Compute offset. */
34994 gcc_assert (i
<= 1);
34996 /* Extract the half. */
34997 tmp
= gen_reg_rtx (half_mode
);
34998 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
35000 /* Put val in tmp at elt. */
35001 ix86_expand_vector_set (false, tmp
, val
, elt
);
35004 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
35013 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
35014 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
35015 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
35019 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
35021 emit_move_insn (mem
, target
);
35023 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
35024 emit_move_insn (tmp
, val
);
35026 emit_move_insn (target
, mem
);
35031 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
35033 enum machine_mode mode
= GET_MODE (vec
);
35034 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
35035 bool use_vec_extr
= false;
35048 use_vec_extr
= true;
35052 use_vec_extr
= TARGET_SSE4_1
;
35064 tmp
= gen_reg_rtx (mode
);
35065 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
35066 GEN_INT (elt
), GEN_INT (elt
),
35067 GEN_INT (elt
+4), GEN_INT (elt
+4)));
35071 tmp
= gen_reg_rtx (mode
);
35072 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
35076 gcc_unreachable ();
35079 use_vec_extr
= true;
35084 use_vec_extr
= TARGET_SSE4_1
;
35098 tmp
= gen_reg_rtx (mode
);
35099 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
35100 GEN_INT (elt
), GEN_INT (elt
),
35101 GEN_INT (elt
), GEN_INT (elt
)));
35105 tmp
= gen_reg_rtx (mode
);
35106 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
35110 gcc_unreachable ();
35113 use_vec_extr
= true;
35118 /* For SSE1, we have to reuse the V4SF code. */
35119 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
35120 gen_lowpart (V4SFmode
, vec
), elt
);
35126 use_vec_extr
= TARGET_SSE2
;
35129 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
35133 use_vec_extr
= TARGET_SSE4_1
;
35139 tmp
= gen_reg_rtx (V4SFmode
);
35141 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
35143 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
35144 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
35152 tmp
= gen_reg_rtx (V2DFmode
);
35154 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
35156 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
35157 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
35165 tmp
= gen_reg_rtx (V16QImode
);
35167 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
35169 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
35170 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
35178 tmp
= gen_reg_rtx (V8HImode
);
35180 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
35182 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
35183 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
35191 tmp
= gen_reg_rtx (V4SImode
);
35193 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
35195 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
35196 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
35204 tmp
= gen_reg_rtx (V2DImode
);
35206 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
35208 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
35209 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
35215 /* ??? Could extract the appropriate HImode element and shift. */
35222 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
35223 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
35225 /* Let the rtl optimizers know about the zero extension performed. */
35226 if (inner_mode
== QImode
|| inner_mode
== HImode
)
35228 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
35229 target
= gen_lowpart (SImode
, target
);
35232 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
35236 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
35238 emit_move_insn (mem
, vec
);
35240 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
35241 emit_move_insn (target
, tmp
);
35245 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
35246 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
35247 The upper bits of DEST are undefined, though they shouldn't cause
35248 exceptions (some bits from src or all zeros are ok). */
35251 emit_reduc_half (rtx dest
, rtx src
, int i
)
35254 switch (GET_MODE (src
))
35258 tem
= gen_sse_movhlps (dest
, src
, src
);
35260 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
35261 GEN_INT (1 + 4), GEN_INT (1 + 4));
35264 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
35270 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
35271 gen_lowpart (V1TImode
, src
),
35276 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
35278 tem
= gen_avx_shufps256 (dest
, src
, src
,
35279 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
35283 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
35285 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
35292 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
35293 gen_lowpart (V4DImode
, src
),
35294 gen_lowpart (V4DImode
, src
),
35297 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
35298 gen_lowpart (V2TImode
, src
),
35302 gcc_unreachable ();
35307 /* Expand a vector reduction. FN is the binary pattern to reduce;
35308 DEST is the destination; IN is the input vector. */
35311 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
35313 rtx half
, dst
, vec
= in
;
35314 enum machine_mode mode
= GET_MODE (in
);
35317 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
35319 && mode
== V8HImode
35320 && fn
== gen_uminv8hi3
)
35322 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
35326 for (i
= GET_MODE_BITSIZE (mode
);
35327 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
35330 half
= gen_reg_rtx (mode
);
35331 emit_reduc_half (half
, vec
, i
);
35332 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
35335 dst
= gen_reg_rtx (mode
);
35336 emit_insn (fn (dst
, half
, vec
));
35341 /* Target hook for scalar_mode_supported_p. */
35343 ix86_scalar_mode_supported_p (enum machine_mode mode
)
35345 if (DECIMAL_FLOAT_MODE_P (mode
))
35346 return default_decimal_float_supported_p ();
35347 else if (mode
== TFmode
)
35350 return default_scalar_mode_supported_p (mode
);
35353 /* Implements target hook vector_mode_supported_p. */
35355 ix86_vector_mode_supported_p (enum machine_mode mode
)
35357 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
35359 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
35361 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
35363 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
35365 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
35370 /* Target hook for c_mode_for_suffix. */
35371 static enum machine_mode
35372 ix86_c_mode_for_suffix (char suffix
)
35382 /* Worker function for TARGET_MD_ASM_CLOBBERS.
35384 We do this in the new i386 backend to maintain source compatibility
35385 with the old cc0-based compiler. */
35388 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
35389 tree inputs ATTRIBUTE_UNUSED
,
35392 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
35394 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
35399 /* Implements target vector targetm.asm.encode_section_info. */
35401 static void ATTRIBUTE_UNUSED
35402 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
35404 default_encode_section_info (decl
, rtl
, first
);
35406 if (TREE_CODE (decl
) == VAR_DECL
35407 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
35408 && ix86_in_large_data_p (decl
))
35409 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
35412 /* Worker function for REVERSE_CONDITION. */
35415 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
35417 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
35418 ? reverse_condition (code
)
35419 : reverse_condition_maybe_unordered (code
));
35422 /* Output code to perform an x87 FP register move, from OPERANDS[1]
35426 output_387_reg_move (rtx insn
, rtx
*operands
)
35428 if (REG_P (operands
[0]))
35430 if (REG_P (operands
[1])
35431 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
35433 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
35434 return output_387_ffreep (operands
, 0);
35435 return "fstp\t%y0";
35437 if (STACK_TOP_P (operands
[0]))
35438 return "fld%Z1\t%y1";
35441 else if (MEM_P (operands
[0]))
35443 gcc_assert (REG_P (operands
[1]));
35444 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
35445 return "fstp%Z0\t%y0";
35448 /* There is no non-popping store to memory for XFmode.
35449 So if we need one, follow the store with a load. */
35450 if (GET_MODE (operands
[0]) == XFmode
)
35451 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
35453 return "fst%Z0\t%y0";
35460 /* Output code to perform a conditional jump to LABEL, if C2 flag in
35461 FP status register is set. */
35464 ix86_emit_fp_unordered_jump (rtx label
)
35466 rtx reg
= gen_reg_rtx (HImode
);
35469 emit_insn (gen_x86_fnstsw_1 (reg
));
35471 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
35473 emit_insn (gen_x86_sahf_1 (reg
));
35475 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
35476 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
35480 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
35482 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
35483 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
35486 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
35487 gen_rtx_LABEL_REF (VOIDmode
, label
),
35489 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
35491 emit_jump_insn (temp
);
35492 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
35495 /* Output code to perform a log1p XFmode calculation. */
35497 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
35499 rtx label1
= gen_label_rtx ();
35500 rtx label2
= gen_label_rtx ();
35502 rtx tmp
= gen_reg_rtx (XFmode
);
35503 rtx tmp2
= gen_reg_rtx (XFmode
);
35506 emit_insn (gen_absxf2 (tmp
, op1
));
35507 test
= gen_rtx_GE (VOIDmode
, tmp
,
35508 CONST_DOUBLE_FROM_REAL_VALUE (
35509 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
35511 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
35513 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
35514 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
35515 emit_jump (label2
);
35517 emit_label (label1
);
35518 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
35519 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
35520 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
35521 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
35523 emit_label (label2
);
35526 /* Emit code for round calculation. */
35527 void ix86_emit_i387_round (rtx op0
, rtx op1
)
35529 enum machine_mode inmode
= GET_MODE (op1
);
35530 enum machine_mode outmode
= GET_MODE (op0
);
35531 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
35532 rtx scratch
= gen_reg_rtx (HImode
);
35533 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
35534 rtx jump_label
= gen_label_rtx ();
35536 rtx (*gen_abs
) (rtx
, rtx
);
35537 rtx (*gen_neg
) (rtx
, rtx
);
35542 gen_abs
= gen_abssf2
;
35545 gen_abs
= gen_absdf2
;
35548 gen_abs
= gen_absxf2
;
35551 gcc_unreachable ();
35557 gen_neg
= gen_negsf2
;
35560 gen_neg
= gen_negdf2
;
35563 gen_neg
= gen_negxf2
;
35566 gen_neg
= gen_neghi2
;
35569 gen_neg
= gen_negsi2
;
35572 gen_neg
= gen_negdi2
;
35575 gcc_unreachable ();
35578 e1
= gen_reg_rtx (inmode
);
35579 e2
= gen_reg_rtx (inmode
);
35580 res
= gen_reg_rtx (outmode
);
35582 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
35584 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
35586 /* scratch = fxam(op1) */
35587 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
35588 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
35590 /* e1 = fabs(op1) */
35591 emit_insn (gen_abs (e1
, op1
));
35593 /* e2 = e1 + 0.5 */
35594 half
= force_reg (inmode
, half
);
35595 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35596 gen_rtx_PLUS (inmode
, e1
, half
)));
35598 /* res = floor(e2) */
35599 if (inmode
!= XFmode
)
35601 tmp1
= gen_reg_rtx (XFmode
);
35603 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
35604 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
35614 rtx tmp0
= gen_reg_rtx (XFmode
);
35616 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
35618 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35619 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
35620 UNSPEC_TRUNC_NOOP
)));
35624 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
35627 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
35630 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
35633 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
35636 gcc_unreachable ();
35639 /* flags = signbit(a) */
35640 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
35642 /* if (flags) then res = -res */
35643 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
35644 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
35645 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
35647 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35648 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
35649 JUMP_LABEL (insn
) = jump_label
;
35651 emit_insn (gen_neg (res
, res
));
35653 emit_label (jump_label
);
35654 LABEL_NUSES (jump_label
) = 1;
35656 emit_move_insn (op0
, res
);
35659 /* Output code to perform a Newton-Rhapson approximation of a single precision
35660 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
35662 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
35664 rtx x0
, x1
, e0
, e1
;
35666 x0
= gen_reg_rtx (mode
);
35667 e0
= gen_reg_rtx (mode
);
35668 e1
= gen_reg_rtx (mode
);
35669 x1
= gen_reg_rtx (mode
);
35671 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
35673 b
= force_reg (mode
, b
);
35675 /* x0 = rcp(b) estimate */
35676 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35677 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
35680 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35681 gen_rtx_MULT (mode
, x0
, b
)));
35684 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35685 gen_rtx_MULT (mode
, x0
, e0
)));
35688 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35689 gen_rtx_PLUS (mode
, x0
, x0
)));
35692 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
35693 gen_rtx_MINUS (mode
, e1
, e0
)));
35696 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35697 gen_rtx_MULT (mode
, a
, x1
)));
35700 /* Output code to perform a Newton-Rhapson approximation of a
35701 single precision floating point [reciprocal] square root. */
35703 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
35706 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
35709 x0
= gen_reg_rtx (mode
);
35710 e0
= gen_reg_rtx (mode
);
35711 e1
= gen_reg_rtx (mode
);
35712 e2
= gen_reg_rtx (mode
);
35713 e3
= gen_reg_rtx (mode
);
35715 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
35716 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35718 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
35719 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35721 if (VECTOR_MODE_P (mode
))
35723 mthree
= ix86_build_const_vector (mode
, true, mthree
);
35724 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
35727 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
35728 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
35730 a
= force_reg (mode
, a
);
35732 /* x0 = rsqrt(a) estimate */
35733 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35734 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
35737 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
35742 zero
= gen_reg_rtx (mode
);
35743 mask
= gen_reg_rtx (mode
);
35745 zero
= force_reg (mode
, CONST0_RTX(mode
));
35746 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35747 gen_rtx_NE (mode
, zero
, a
)));
35749 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35750 gen_rtx_AND (mode
, x0
, mask
)));
35754 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35755 gen_rtx_MULT (mode
, x0
, a
)));
35757 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35758 gen_rtx_MULT (mode
, e0
, x0
)));
35761 mthree
= force_reg (mode
, mthree
);
35762 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35763 gen_rtx_PLUS (mode
, e1
, mthree
)));
35765 mhalf
= force_reg (mode
, mhalf
);
35767 /* e3 = -.5 * x0 */
35768 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35769 gen_rtx_MULT (mode
, x0
, mhalf
)));
35771 /* e3 = -.5 * e0 */
35772 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35773 gen_rtx_MULT (mode
, e0
, mhalf
)));
35774 /* ret = e2 * e3 */
35775 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35776 gen_rtx_MULT (mode
, e2
, e3
)));
35779 #ifdef TARGET_SOLARIS
35780 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
35783 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
35786 /* With Binutils 2.15, the "@unwind" marker must be specified on
35787 every occurrence of the ".eh_frame" section, not just the first
35790 && strcmp (name
, ".eh_frame") == 0)
35792 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
35793 flags
& SECTION_WRITE
? "aw" : "a");
35798 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
35800 solaris_elf_asm_comdat_section (name
, flags
, decl
);
35805 default_elf_asm_named_section (name
, flags
, decl
);
35807 #endif /* TARGET_SOLARIS */
35809 /* Return the mangling of TYPE if it is an extended fundamental type. */
35811 static const char *
35812 ix86_mangle_type (const_tree type
)
35814 type
= TYPE_MAIN_VARIANT (type
);
35816 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
35817 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
35820 switch (TYPE_MODE (type
))
35823 /* __float128 is "g". */
35826 /* "long double" or __float80 is "e". */
35833 /* For 32-bit code we can save PIC register setup by using
35834 __stack_chk_fail_local hidden function instead of calling
35835 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
35836 register, so it is better to call __stack_chk_fail directly. */
35838 static tree ATTRIBUTE_UNUSED
35839 ix86_stack_protect_fail (void)
35841 return TARGET_64BIT
35842 ? default_external_stack_protect_fail ()
35843 : default_hidden_stack_protect_fail ();
35846 /* Select a format to encode pointers in exception handling data. CODE
35847 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
35848 true if the symbol may be affected by dynamic relocations.
35850 ??? All x86 object file formats are capable of representing this.
35851 After all, the relocation needed is the same as for the call insn.
35852 Whether or not a particular assembler allows us to enter such, I
35853 guess we'll have to see. */
35855 asm_preferred_eh_data_format (int code
, int global
)
35859 int type
= DW_EH_PE_sdata8
;
35861 || ix86_cmodel
== CM_SMALL_PIC
35862 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
35863 type
= DW_EH_PE_sdata4
;
35864 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
35866 if (ix86_cmodel
== CM_SMALL
35867 || (ix86_cmodel
== CM_MEDIUM
&& code
))
35868 return DW_EH_PE_udata4
;
35869 return DW_EH_PE_absptr
;
35872 /* Expand copysign from SIGN to the positive value ABS_VALUE
35873 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
35876 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
35878 enum machine_mode mode
= GET_MODE (sign
);
35879 rtx sgn
= gen_reg_rtx (mode
);
35880 if (mask
== NULL_RTX
)
35882 enum machine_mode vmode
;
35884 if (mode
== SFmode
)
35886 else if (mode
== DFmode
)
35891 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
35892 if (!VECTOR_MODE_P (mode
))
35894 /* We need to generate a scalar mode mask in this case. */
35895 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35896 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35897 mask
= gen_reg_rtx (mode
);
35898 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35902 mask
= gen_rtx_NOT (mode
, mask
);
35903 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
35904 gen_rtx_AND (mode
, mask
, sign
)));
35905 emit_insn (gen_rtx_SET (VOIDmode
, result
,
35906 gen_rtx_IOR (mode
, abs_value
, sgn
)));
35909 /* Expand fabs (OP0) and return a new rtx that holds the result. The
35910 mask for masking out the sign-bit is stored in *SMASK, if that is
35913 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
35915 enum machine_mode vmode
, mode
= GET_MODE (op0
);
35918 xa
= gen_reg_rtx (mode
);
35919 if (mode
== SFmode
)
35921 else if (mode
== DFmode
)
35925 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
35926 if (!VECTOR_MODE_P (mode
))
35928 /* We need to generate a scalar mode mask in this case. */
35929 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35930 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35931 mask
= gen_reg_rtx (mode
);
35932 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35934 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
35935 gen_rtx_AND (mode
, op0
, mask
)));
35943 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
35944 swapping the operands if SWAP_OPERANDS is true. The expanded
35945 code is a forward jump to a newly created label in case the
35946 comparison is true. The generated label rtx is returned. */
35948 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
35949 bool swap_operands
)
35960 label
= gen_label_rtx ();
35961 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
35962 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35963 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
35964 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
35965 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
35966 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
35967 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35968 JUMP_LABEL (tmp
) = label
;
35973 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
35974 using comparison code CODE. Operands are swapped for the comparison if
35975 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
35977 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
35978 bool swap_operands
)
35980 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
35981 enum machine_mode mode
= GET_MODE (op0
);
35982 rtx mask
= gen_reg_rtx (mode
);
35991 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
35993 emit_insn (insn (mask
, op0
, op1
,
35994 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
35998 /* Generate and return a rtx of mode MODE for 2**n where n is the number
35999 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
36001 ix86_gen_TWO52 (enum machine_mode mode
)
36003 REAL_VALUE_TYPE TWO52r
;
36006 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
36007 TWO52
= const_double_from_real_value (TWO52r
, mode
);
36008 TWO52
= force_reg (mode
, TWO52
);
36013 /* Expand SSE sequence for computing lround from OP1 storing
36016 ix86_expand_lround (rtx op0
, rtx op1
)
36018 /* C code for the stuff we're doing below:
36019 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
36022 enum machine_mode mode
= GET_MODE (op1
);
36023 const struct real_format
*fmt
;
36024 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
36027 /* load nextafter (0.5, 0.0) */
36028 fmt
= REAL_MODE_FORMAT (mode
);
36029 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
36030 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
36032 /* adj = copysign (0.5, op1) */
36033 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
36034 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
36036 /* adj = op1 + adj */
36037 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
36039 /* op0 = (imode)adj */
36040 expand_fix (op0
, adj
, 0);
36043 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
36046 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
36048 /* C code for the stuff we're doing below (for do_floor):
36050 xi -= (double)xi > op1 ? 1 : 0;
36053 enum machine_mode fmode
= GET_MODE (op1
);
36054 enum machine_mode imode
= GET_MODE (op0
);
36055 rtx ireg
, freg
, label
, tmp
;
36057 /* reg = (long)op1 */
36058 ireg
= gen_reg_rtx (imode
);
36059 expand_fix (ireg
, op1
, 0);
36061 /* freg = (double)reg */
36062 freg
= gen_reg_rtx (fmode
);
36063 expand_float (freg
, ireg
, 0);
36065 /* ireg = (freg > op1) ? ireg - 1 : ireg */
36066 label
= ix86_expand_sse_compare_and_jump (UNLE
,
36067 freg
, op1
, !do_floor
);
36068 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
36069 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
36070 emit_move_insn (ireg
, tmp
);
36072 emit_label (label
);
36073 LABEL_NUSES (label
) = 1;
36075 emit_move_insn (op0
, ireg
);
36078 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
36079 result in OPERAND0. */
36081 ix86_expand_rint (rtx operand0
, rtx operand1
)
36083 /* C code for the stuff we're doing below:
36084 xa = fabs (operand1);
36085 if (!isless (xa, 2**52))
36087 xa = xa + 2**52 - 2**52;
36088 return copysign (xa, operand1);
36090 enum machine_mode mode
= GET_MODE (operand0
);
36091 rtx res
, xa
, label
, TWO52
, mask
;
36093 res
= gen_reg_rtx (mode
);
36094 emit_move_insn (res
, operand1
);
36096 /* xa = abs (operand1) */
36097 xa
= ix86_expand_sse_fabs (res
, &mask
);
36099 /* if (!isless (xa, TWO52)) goto label; */
36100 TWO52
= ix86_gen_TWO52 (mode
);
36101 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36103 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36104 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
36106 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
36108 emit_label (label
);
36109 LABEL_NUSES (label
) = 1;
36111 emit_move_insn (operand0
, res
);
36114 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
36117 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
36119 /* C code for the stuff we expand below.
36120 double xa = fabs (x), x2;
36121 if (!isless (xa, TWO52))
36123 xa = xa + TWO52 - TWO52;
36124 x2 = copysign (xa, x);
36133 enum machine_mode mode
= GET_MODE (operand0
);
36134 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
36136 TWO52
= ix86_gen_TWO52 (mode
);
36138 /* Temporary for holding the result, initialized to the input
36139 operand to ease control flow. */
36140 res
= gen_reg_rtx (mode
);
36141 emit_move_insn (res
, operand1
);
36143 /* xa = abs (operand1) */
36144 xa
= ix86_expand_sse_fabs (res
, &mask
);
36146 /* if (!isless (xa, TWO52)) goto label; */
36147 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36149 /* xa = xa + TWO52 - TWO52; */
36150 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36151 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
36153 /* xa = copysign (xa, operand1) */
36154 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
36156 /* generate 1.0 or -1.0 */
36157 one
= force_reg (mode
,
36158 const_double_from_real_value (do_floor
36159 ? dconst1
: dconstm1
, mode
));
36161 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
36162 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
36163 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36164 gen_rtx_AND (mode
, one
, tmp
)));
36165 /* We always need to subtract here to preserve signed zero. */
36166 tmp
= expand_simple_binop (mode
, MINUS
,
36167 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36168 emit_move_insn (res
, tmp
);
36170 emit_label (label
);
36171 LABEL_NUSES (label
) = 1;
36173 emit_move_insn (operand0
, res
);
36176 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
36179 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
36181 /* C code for the stuff we expand below.
36182 double xa = fabs (x), x2;
36183 if (!isless (xa, TWO52))
36185 x2 = (double)(long)x;
36192 if (HONOR_SIGNED_ZEROS (mode))
36193 return copysign (x2, x);
36196 enum machine_mode mode
= GET_MODE (operand0
);
36197 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
36199 TWO52
= ix86_gen_TWO52 (mode
);
36201 /* Temporary for holding the result, initialized to the input
36202 operand to ease control flow. */
36203 res
= gen_reg_rtx (mode
);
36204 emit_move_insn (res
, operand1
);
36206 /* xa = abs (operand1) */
36207 xa
= ix86_expand_sse_fabs (res
, &mask
);
36209 /* if (!isless (xa, TWO52)) goto label; */
36210 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36212 /* xa = (double)(long)x */
36213 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36214 expand_fix (xi
, res
, 0);
36215 expand_float (xa
, xi
, 0);
36218 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
36220 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
36221 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
36222 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36223 gen_rtx_AND (mode
, one
, tmp
)));
36224 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
36225 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36226 emit_move_insn (res
, tmp
);
36228 if (HONOR_SIGNED_ZEROS (mode
))
36229 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
36231 emit_label (label
);
36232 LABEL_NUSES (label
) = 1;
36234 emit_move_insn (operand0
, res
);
36237 /* Expand SSE sequence for computing round from OPERAND1 storing
36238 into OPERAND0. Sequence that works without relying on DImode truncation
36239 via cvttsd2siq that is only available on 64bit targets. */
36241 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
36243 /* C code for the stuff we expand below.
36244 double xa = fabs (x), xa2, x2;
36245 if (!isless (xa, TWO52))
36247 Using the absolute value and copying back sign makes
36248 -0.0 -> -0.0 correct.
36249 xa2 = xa + TWO52 - TWO52;
36254 else if (dxa > 0.5)
36256 x2 = copysign (xa2, x);
36259 enum machine_mode mode
= GET_MODE (operand0
);
36260 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
36262 TWO52
= ix86_gen_TWO52 (mode
);
36264 /* Temporary for holding the result, initialized to the input
36265 operand to ease control flow. */
36266 res
= gen_reg_rtx (mode
);
36267 emit_move_insn (res
, operand1
);
36269 /* xa = abs (operand1) */
36270 xa
= ix86_expand_sse_fabs (res
, &mask
);
36272 /* if (!isless (xa, TWO52)) goto label; */
36273 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36275 /* xa2 = xa + TWO52 - TWO52; */
36276 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36277 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
36279 /* dxa = xa2 - xa; */
36280 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
36282 /* generate 0.5, 1.0 and -0.5 */
36283 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
36284 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
36285 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
36289 tmp
= gen_reg_rtx (mode
);
36290 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
36291 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
36292 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36293 gen_rtx_AND (mode
, one
, tmp
)));
36294 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36295 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
36296 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
36297 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36298 gen_rtx_AND (mode
, one
, tmp
)));
36299 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36301 /* res = copysign (xa2, operand1) */
36302 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
36304 emit_label (label
);
36305 LABEL_NUSES (label
) = 1;
36307 emit_move_insn (operand0
, res
);
36310 /* Expand SSE sequence for computing trunc from OPERAND1 storing
36313 ix86_expand_trunc (rtx operand0
, rtx operand1
)
36315 /* C code for SSE variant we expand below.
36316 double xa = fabs (x), x2;
36317 if (!isless (xa, TWO52))
36319 x2 = (double)(long)x;
36320 if (HONOR_SIGNED_ZEROS (mode))
36321 return copysign (x2, x);
36324 enum machine_mode mode
= GET_MODE (operand0
);
36325 rtx xa
, xi
, TWO52
, label
, res
, mask
;
36327 TWO52
= ix86_gen_TWO52 (mode
);
36329 /* Temporary for holding the result, initialized to the input
36330 operand to ease control flow. */
36331 res
= gen_reg_rtx (mode
);
36332 emit_move_insn (res
, operand1
);
36334 /* xa = abs (operand1) */
36335 xa
= ix86_expand_sse_fabs (res
, &mask
);
36337 /* if (!isless (xa, TWO52)) goto label; */
36338 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36340 /* x = (double)(long)x */
36341 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36342 expand_fix (xi
, res
, 0);
36343 expand_float (res
, xi
, 0);
36345 if (HONOR_SIGNED_ZEROS (mode
))
36346 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
36348 emit_label (label
);
36349 LABEL_NUSES (label
) = 1;
36351 emit_move_insn (operand0
, res
);
36354 /* Expand SSE sequence for computing trunc from OPERAND1 storing
36357 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
36359 enum machine_mode mode
= GET_MODE (operand0
);
36360 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
36362 /* C code for SSE variant we expand below.
36363 double xa = fabs (x), x2;
36364 if (!isless (xa, TWO52))
36366 xa2 = xa + TWO52 - TWO52;
36370 x2 = copysign (xa2, x);
36374 TWO52
= ix86_gen_TWO52 (mode
);
36376 /* Temporary for holding the result, initialized to the input
36377 operand to ease control flow. */
36378 res
= gen_reg_rtx (mode
);
36379 emit_move_insn (res
, operand1
);
36381 /* xa = abs (operand1) */
36382 xa
= ix86_expand_sse_fabs (res
, &smask
);
36384 /* if (!isless (xa, TWO52)) goto label; */
36385 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36387 /* res = xa + TWO52 - TWO52; */
36388 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36389 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
36390 emit_move_insn (res
, tmp
);
36393 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
36395 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
36396 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
36397 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
36398 gen_rtx_AND (mode
, mask
, one
)));
36399 tmp
= expand_simple_binop (mode
, MINUS
,
36400 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
36401 emit_move_insn (res
, tmp
);
36403 /* res = copysign (res, operand1) */
36404 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
36406 emit_label (label
);
36407 LABEL_NUSES (label
) = 1;
36409 emit_move_insn (operand0
, res
);
36412 /* Expand SSE sequence for computing round from OPERAND1 storing
36415 ix86_expand_round (rtx operand0
, rtx operand1
)
36417 /* C code for the stuff we're doing below:
36418 double xa = fabs (x);
36419 if (!isless (xa, TWO52))
36421 xa = (double)(long)(xa + nextafter (0.5, 0.0));
36422 return copysign (xa, x);
36424 enum machine_mode mode
= GET_MODE (operand0
);
36425 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
36426 const struct real_format
*fmt
;
36427 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
36429 /* Temporary for holding the result, initialized to the input
36430 operand to ease control flow. */
36431 res
= gen_reg_rtx (mode
);
36432 emit_move_insn (res
, operand1
);
36434 TWO52
= ix86_gen_TWO52 (mode
);
36435 xa
= ix86_expand_sse_fabs (res
, &mask
);
36436 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36438 /* load nextafter (0.5, 0.0) */
36439 fmt
= REAL_MODE_FORMAT (mode
);
36440 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
36441 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
36443 /* xa = xa + 0.5 */
36444 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
36445 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
36447 /* xa = (double)(int64_t)xa */
36448 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36449 expand_fix (xi
, xa
, 0);
36450 expand_float (xa
, xi
, 0);
36452 /* res = copysign (xa, operand1) */
36453 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
36455 emit_label (label
);
36456 LABEL_NUSES (label
) = 1;
36458 emit_move_insn (operand0
, res
);
36461 /* Expand SSE sequence for computing round
36462 from OP1 storing into OP0 using sse4 round insn. */
36464 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
36466 enum machine_mode mode
= GET_MODE (op0
);
36467 rtx e1
, e2
, res
, half
;
36468 const struct real_format
*fmt
;
36469 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
36470 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
36471 rtx (*gen_round
) (rtx
, rtx
, rtx
);
36476 gen_copysign
= gen_copysignsf3
;
36477 gen_round
= gen_sse4_1_roundsf2
;
36480 gen_copysign
= gen_copysigndf3
;
36481 gen_round
= gen_sse4_1_rounddf2
;
36484 gcc_unreachable ();
36487 /* round (a) = trunc (a + copysign (0.5, a)) */
36489 /* load nextafter (0.5, 0.0) */
36490 fmt
= REAL_MODE_FORMAT (mode
);
36491 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
36492 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
36493 half
= const_double_from_real_value (pred_half
, mode
);
36495 /* e1 = copysign (0.5, op1) */
36496 e1
= gen_reg_rtx (mode
);
36497 emit_insn (gen_copysign (e1
, half
, op1
));
36499 /* e2 = op1 + e1 */
36500 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
36502 /* res = trunc (e2) */
36503 res
= gen_reg_rtx (mode
);
36504 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
36506 emit_move_insn (op0
, res
);
36510 /* Table of valid machine attributes. */
36511 static const struct attribute_spec ix86_attribute_table
[] =
36513 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
36514 affects_type_identity } */
36515 /* Stdcall attribute says callee is responsible for popping arguments
36516 if they are not variable. */
36517 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36519 /* Fastcall attribute says callee is responsible for popping arguments
36520 if they are not variable. */
36521 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36523 /* Thiscall attribute says callee is responsible for popping arguments
36524 if they are not variable. */
36525 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36527 /* Cdecl attribute says the callee is a normal C declaration */
36528 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36530 /* Regparm attribute specifies how many integer arguments are to be
36531 passed in registers. */
36532 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
36534 /* Sseregparm attribute says we are using x86_64 calling conventions
36535 for FP arguments. */
36536 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36538 /* The transactional memory builtins are implicitly regparm or fastcall
36539 depending on the ABI. Override the generic do-nothing attribute that
36540 these builtins were declared with. */
36541 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
36543 /* force_align_arg_pointer says this function realigns the stack at entry. */
36544 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
36545 false, true, true, ix86_handle_cconv_attribute
, false },
36546 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36547 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
36548 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
36549 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
36552 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
36554 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
36556 #ifdef SUBTARGET_ATTRIBUTE_TABLE
36557 SUBTARGET_ATTRIBUTE_TABLE
,
36559 /* ms_abi and sysv_abi calling convention function attributes. */
36560 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
36561 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
36562 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
36564 { "callee_pop_aggregate_return", 1, 1, false, true, true,
36565 ix86_handle_callee_pop_aggregate_return
, true },
36567 { NULL
, 0, 0, false, false, false, NULL
, false }
36570 /* Implement targetm.vectorize.builtin_vectorization_cost. */
36572 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
36574 int misalign ATTRIBUTE_UNUSED
)
36578 switch (type_of_cost
)
36581 return ix86_cost
->scalar_stmt_cost
;
36584 return ix86_cost
->scalar_load_cost
;
36587 return ix86_cost
->scalar_store_cost
;
36590 return ix86_cost
->vec_stmt_cost
;
36593 return ix86_cost
->vec_align_load_cost
;
36596 return ix86_cost
->vec_store_cost
;
36598 case vec_to_scalar
:
36599 return ix86_cost
->vec_to_scalar_cost
;
36601 case scalar_to_vec
:
36602 return ix86_cost
->scalar_to_vec_cost
;
36604 case unaligned_load
:
36605 case unaligned_store
:
36606 return ix86_cost
->vec_unalign_load_cost
;
36608 case cond_branch_taken
:
36609 return ix86_cost
->cond_taken_branch_cost
;
36611 case cond_branch_not_taken
:
36612 return ix86_cost
->cond_not_taken_branch_cost
;
36615 case vec_promote_demote
:
36616 return ix86_cost
->vec_stmt_cost
;
36618 case vec_construct
:
36619 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
36620 return elements
/ 2 + 1;
36623 gcc_unreachable ();
36627 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
36628 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
36629 insn every time. */
36631 static GTY(()) rtx vselect_insn
;
36633 /* Initialize vselect_insn. */
36636 init_vselect_insn (void)
36641 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
36642 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
36643 XVECEXP (x
, 0, i
) = const0_rtx
;
36644 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
36646 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
36648 vselect_insn
= emit_insn (x
);
36652 /* Construct (set target (vec_select op0 (parallel perm))) and
36653 return true if that's a valid instruction in the active ISA. */
36656 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
36657 unsigned nelt
, bool testing_p
)
36660 rtx x
, save_vconcat
;
36663 if (vselect_insn
== NULL_RTX
)
36664 init_vselect_insn ();
36666 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
36667 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
36668 for (i
= 0; i
< nelt
; ++i
)
36669 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
36670 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36671 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
36672 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
36673 SET_DEST (PATTERN (vselect_insn
)) = target
;
36674 icode
= recog_memoized (vselect_insn
);
36676 if (icode
>= 0 && !testing_p
)
36677 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
36679 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
36680 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
36681 INSN_CODE (vselect_insn
) = -1;
36686 /* Similar, but generate a vec_concat from op0 and op1 as well. */
36689 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
36690 const unsigned char *perm
, unsigned nelt
,
36693 enum machine_mode v2mode
;
36697 if (vselect_insn
== NULL_RTX
)
36698 init_vselect_insn ();
36700 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
36701 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36702 PUT_MODE (x
, v2mode
);
36705 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
36706 XEXP (x
, 0) = const0_rtx
;
36707 XEXP (x
, 1) = const0_rtx
;
36711 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36712 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
36715 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
36717 enum machine_mode vmode
= d
->vmode
;
36718 unsigned i
, mask
, nelt
= d
->nelt
;
36719 rtx target
, op0
, op1
, x
;
36720 rtx rperm
[32], vperm
;
36722 if (d
->one_operand_p
)
36724 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
36726 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
36728 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
36733 /* This is a blend, not a permute. Elements must stay in their
36734 respective lanes. */
36735 for (i
= 0; i
< nelt
; ++i
)
36737 unsigned e
= d
->perm
[i
];
36738 if (!(e
== i
|| e
== i
+ nelt
))
36745 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
36746 decision should be extracted elsewhere, so that we only try that
36747 sequence once all budget==3 options have been tried. */
36748 target
= d
->target
;
36761 for (i
= 0; i
< nelt
; ++i
)
36762 mask
|= (d
->perm
[i
] >= nelt
) << i
;
36766 for (i
= 0; i
< 2; ++i
)
36767 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
36772 for (i
= 0; i
< 4; ++i
)
36773 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36778 /* See if bytes move in pairs so we can use pblendw with
36779 an immediate argument, rather than pblendvb with a vector
36781 for (i
= 0; i
< 16; i
+= 2)
36782 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36785 for (i
= 0; i
< nelt
; ++i
)
36786 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
36789 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
36790 vperm
= force_reg (vmode
, vperm
);
36792 if (GET_MODE_SIZE (vmode
) == 16)
36793 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
36795 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
36799 for (i
= 0; i
< 8; ++i
)
36800 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36805 target
= gen_lowpart (vmode
, target
);
36806 op0
= gen_lowpart (vmode
, op0
);
36807 op1
= gen_lowpart (vmode
, op1
);
36811 /* See if bytes move in pairs. If not, vpblendvb must be used. */
36812 for (i
= 0; i
< 32; i
+= 2)
36813 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36815 /* See if bytes move in quadruplets. If yes, vpblendd
36816 with immediate can be used. */
36817 for (i
= 0; i
< 32; i
+= 4)
36818 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
36822 /* See if bytes move the same in both lanes. If yes,
36823 vpblendw with immediate can be used. */
36824 for (i
= 0; i
< 16; i
+= 2)
36825 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
36828 /* Use vpblendw. */
36829 for (i
= 0; i
< 16; ++i
)
36830 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
36835 /* Use vpblendd. */
36836 for (i
= 0; i
< 8; ++i
)
36837 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
36842 /* See if words move in pairs. If yes, vpblendd can be used. */
36843 for (i
= 0; i
< 16; i
+= 2)
36844 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36848 /* See if words move the same in both lanes. If not,
36849 vpblendvb must be used. */
36850 for (i
= 0; i
< 8; i
++)
36851 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
36853 /* Use vpblendvb. */
36854 for (i
= 0; i
< 32; ++i
)
36855 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
36859 target
= gen_lowpart (vmode
, target
);
36860 op0
= gen_lowpart (vmode
, op0
);
36861 op1
= gen_lowpart (vmode
, op1
);
36862 goto finish_pblendvb
;
36865 /* Use vpblendw. */
36866 for (i
= 0; i
< 16; ++i
)
36867 mask
|= (d
->perm
[i
] >= 16) << i
;
36871 /* Use vpblendd. */
36872 for (i
= 0; i
< 8; ++i
)
36873 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36878 /* Use vpblendd. */
36879 for (i
= 0; i
< 4; ++i
)
36880 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36885 gcc_unreachable ();
36888 /* This matches five different patterns with the different modes. */
36889 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
36890 x
= gen_rtx_SET (VOIDmode
, target
, x
);
36896 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36897 in terms of the variable form of vpermilps.
36899 Note that we will have already failed the immediate input vpermilps,
36900 which requires that the high and low part shuffle be identical; the
36901 variable form doesn't require that. */
36904 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
36906 rtx rperm
[8], vperm
;
36909 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
36912 /* We can only permute within the 128-bit lane. */
36913 for (i
= 0; i
< 8; ++i
)
36915 unsigned e
= d
->perm
[i
];
36916 if (i
< 4 ? e
>= 4 : e
< 4)
36923 for (i
= 0; i
< 8; ++i
)
36925 unsigned e
= d
->perm
[i
];
36927 /* Within each 128-bit lane, the elements of op0 are numbered
36928 from 0 and the elements of op1 are numbered from 4. */
36934 rperm
[i
] = GEN_INT (e
);
36937 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
36938 vperm
= force_reg (V8SImode
, vperm
);
36939 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
36944 /* Return true if permutation D can be performed as VMODE permutation
36948 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
36950 unsigned int i
, j
, chunk
;
36952 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
36953 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
36954 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
36957 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
36960 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
36961 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
36962 if (d
->perm
[i
] & (chunk
- 1))
36965 for (j
= 1; j
< chunk
; ++j
)
36966 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
36972 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36973 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
36976 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
36978 unsigned i
, nelt
, eltsz
, mask
;
36979 unsigned char perm
[32];
36980 enum machine_mode vmode
= V16QImode
;
36981 rtx rperm
[32], vperm
, target
, op0
, op1
;
36985 if (!d
->one_operand_p
)
36987 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
36990 && valid_perm_using_mode_p (V2TImode
, d
))
36995 /* Use vperm2i128 insn. The pattern uses
36996 V4DImode instead of V2TImode. */
36997 target
= gen_lowpart (V4DImode
, d
->target
);
36998 op0
= gen_lowpart (V4DImode
, d
->op0
);
36999 op1
= gen_lowpart (V4DImode
, d
->op1
);
37001 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
37002 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
37003 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
37011 if (GET_MODE_SIZE (d
->vmode
) == 16)
37016 else if (GET_MODE_SIZE (d
->vmode
) == 32)
37021 /* V4DImode should be already handled through
37022 expand_vselect by vpermq instruction. */
37023 gcc_assert (d
->vmode
!= V4DImode
);
37026 if (d
->vmode
== V8SImode
37027 || d
->vmode
== V16HImode
37028 || d
->vmode
== V32QImode
)
37030 /* First see if vpermq can be used for
37031 V8SImode/V16HImode/V32QImode. */
37032 if (valid_perm_using_mode_p (V4DImode
, d
))
37034 for (i
= 0; i
< 4; i
++)
37035 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
37038 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
37039 gen_lowpart (V4DImode
, d
->op0
),
37043 /* Next see if vpermd can be used. */
37044 if (valid_perm_using_mode_p (V8SImode
, d
))
37047 /* Or if vpermps can be used. */
37048 else if (d
->vmode
== V8SFmode
)
37051 if (vmode
== V32QImode
)
37053 /* vpshufb only works intra lanes, it is not
37054 possible to shuffle bytes in between the lanes. */
37055 for (i
= 0; i
< nelt
; ++i
)
37056 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
37067 if (vmode
== V8SImode
)
37068 for (i
= 0; i
< 8; ++i
)
37069 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
37072 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37073 if (!d
->one_operand_p
)
37074 mask
= 2 * nelt
- 1;
37075 else if (vmode
== V16QImode
)
37078 mask
= nelt
/ 2 - 1;
37080 for (i
= 0; i
< nelt
; ++i
)
37082 unsigned j
, e
= d
->perm
[i
] & mask
;
37083 for (j
= 0; j
< eltsz
; ++j
)
37084 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
37088 vperm
= gen_rtx_CONST_VECTOR (vmode
,
37089 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
37090 vperm
= force_reg (vmode
, vperm
);
37092 target
= gen_lowpart (vmode
, d
->target
);
37093 op0
= gen_lowpart (vmode
, d
->op0
);
37094 if (d
->one_operand_p
)
37096 if (vmode
== V16QImode
)
37097 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
37098 else if (vmode
== V32QImode
)
37099 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
37100 else if (vmode
== V8SFmode
)
37101 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
37103 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
37107 op1
= gen_lowpart (vmode
, d
->op1
);
37108 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
37114 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
37115 in a single instruction. */
37118 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
37120 unsigned i
, nelt
= d
->nelt
;
37121 unsigned char perm2
[MAX_VECT_LEN
];
37123 /* Check plain VEC_SELECT first, because AVX has instructions that could
37124 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
37125 input where SEL+CONCAT may not. */
37126 if (d
->one_operand_p
)
37128 int mask
= nelt
- 1;
37129 bool identity_perm
= true;
37130 bool broadcast_perm
= true;
37132 for (i
= 0; i
< nelt
; i
++)
37134 perm2
[i
] = d
->perm
[i
] & mask
;
37136 identity_perm
= false;
37138 broadcast_perm
= false;
37144 emit_move_insn (d
->target
, d
->op0
);
37147 else if (broadcast_perm
&& TARGET_AVX2
)
37149 /* Use vpbroadcast{b,w,d}. */
37150 rtx (*gen
) (rtx
, rtx
) = NULL
;
37154 gen
= gen_avx2_pbroadcastv32qi_1
;
37157 gen
= gen_avx2_pbroadcastv16hi_1
;
37160 gen
= gen_avx2_pbroadcastv8si_1
;
37163 gen
= gen_avx2_pbroadcastv16qi
;
37166 gen
= gen_avx2_pbroadcastv8hi
;
37169 gen
= gen_avx2_vec_dupv8sf_1
;
37171 /* For other modes prefer other shuffles this function creates. */
37177 emit_insn (gen (d
->target
, d
->op0
));
37182 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
37185 /* There are plenty of patterns in sse.md that are written for
37186 SEL+CONCAT and are not replicated for a single op. Perhaps
37187 that should be changed, to avoid the nastiness here. */
37189 /* Recognize interleave style patterns, which means incrementing
37190 every other permutation operand. */
37191 for (i
= 0; i
< nelt
; i
+= 2)
37193 perm2
[i
] = d
->perm
[i
] & mask
;
37194 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
37196 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
37200 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
37203 for (i
= 0; i
< nelt
; i
+= 4)
37205 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
37206 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
37207 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
37208 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
37211 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
37217 /* Finally, try the fully general two operand permute. */
37218 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
37222 /* Recognize interleave style patterns with reversed operands. */
37223 if (!d
->one_operand_p
)
37225 for (i
= 0; i
< nelt
; ++i
)
37227 unsigned e
= d
->perm
[i
];
37235 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
37240 /* Try the SSE4.1 blend variable merge instructions. */
37241 if (expand_vec_perm_blend (d
))
37244 /* Try one of the AVX vpermil variable permutations. */
37245 if (expand_vec_perm_vpermil (d
))
37248 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
37249 vpshufb, vpermd, vpermps or vpermq variable permutation. */
37250 if (expand_vec_perm_pshufb (d
))
37256 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37257 in terms of a pair of pshuflw + pshufhw instructions. */
37260 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
37262 unsigned char perm2
[MAX_VECT_LEN
];
37266 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
37269 /* The two permutations only operate in 64-bit lanes. */
37270 for (i
= 0; i
< 4; ++i
)
37271 if (d
->perm
[i
] >= 4)
37273 for (i
= 4; i
< 8; ++i
)
37274 if (d
->perm
[i
] < 4)
37280 /* Emit the pshuflw. */
37281 memcpy (perm2
, d
->perm
, 4);
37282 for (i
= 4; i
< 8; ++i
)
37284 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
37287 /* Emit the pshufhw. */
37288 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
37289 for (i
= 0; i
< 4; ++i
)
37291 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
37297 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37298 the permutation using the SSSE3 palignr instruction. This succeeds
37299 when all of the elements in PERM fit within one vector and we merely
37300 need to shift them down so that a single vector permutation has a
37301 chance to succeed. */
37304 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
37306 unsigned i
, nelt
= d
->nelt
;
37311 /* Even with AVX, palignr only operates on 128-bit vectors. */
37312 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
37315 min
= nelt
, max
= 0;
37316 for (i
= 0; i
< nelt
; ++i
)
37318 unsigned e
= d
->perm
[i
];
37324 if (min
== 0 || max
- min
>= nelt
)
37327 /* Given that we have SSSE3, we know we'll be able to implement the
37328 single operand permutation after the palignr with pshufb. */
37332 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
37333 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
37334 gen_lowpart (TImode
, d
->op1
),
37335 gen_lowpart (TImode
, d
->op0
), shift
));
37337 d
->op0
= d
->op1
= d
->target
;
37338 d
->one_operand_p
= true;
37341 for (i
= 0; i
< nelt
; ++i
)
37343 unsigned e
= d
->perm
[i
] - min
;
37349 /* Test for the degenerate case where the alignment by itself
37350 produces the desired permutation. */
37354 ok
= expand_vec_perm_1 (d
);
37360 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
37362 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37363 a two vector permutation into a single vector permutation by using
37364 an interleave operation to merge the vectors. */
37367 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
37369 struct expand_vec_perm_d dremap
, dfinal
;
37370 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
37371 unsigned HOST_WIDE_INT contents
;
37372 unsigned char remap
[2 * MAX_VECT_LEN
];
37374 bool ok
, same_halves
= false;
37376 if (GET_MODE_SIZE (d
->vmode
) == 16)
37378 if (d
->one_operand_p
)
37381 else if (GET_MODE_SIZE (d
->vmode
) == 32)
37385 /* For 32-byte modes allow even d->one_operand_p.
37386 The lack of cross-lane shuffling in some instructions
37387 might prevent a single insn shuffle. */
37389 dfinal
.testing_p
= true;
37390 /* If expand_vec_perm_interleave3 can expand this into
37391 a 3 insn sequence, give up and let it be expanded as
37392 3 insn sequence. While that is one insn longer,
37393 it doesn't need a memory operand and in the common
37394 case that both interleave low and high permutations
37395 with the same operands are adjacent needs 4 insns
37396 for both after CSE. */
37397 if (expand_vec_perm_interleave3 (&dfinal
))
37403 /* Examine from whence the elements come. */
37405 for (i
= 0; i
< nelt
; ++i
)
37406 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
37408 memset (remap
, 0xff, sizeof (remap
));
37411 if (GET_MODE_SIZE (d
->vmode
) == 16)
37413 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
37415 /* Split the two input vectors into 4 halves. */
37416 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
37421 /* If the elements from the low halves use interleave low, and similarly
37422 for interleave high. If the elements are from mis-matched halves, we
37423 can use shufps for V4SF/V4SI or do a DImode shuffle. */
37424 if ((contents
& (h1
| h3
)) == contents
)
37427 for (i
= 0; i
< nelt2
; ++i
)
37430 remap
[i
+ nelt
] = i
* 2 + 1;
37431 dremap
.perm
[i
* 2] = i
;
37432 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
37434 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
37435 dremap
.vmode
= V4SFmode
;
37437 else if ((contents
& (h2
| h4
)) == contents
)
37440 for (i
= 0; i
< nelt2
; ++i
)
37442 remap
[i
+ nelt2
] = i
* 2;
37443 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
37444 dremap
.perm
[i
* 2] = i
+ nelt2
;
37445 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
37447 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
37448 dremap
.vmode
= V4SFmode
;
37450 else if ((contents
& (h1
| h4
)) == contents
)
37453 for (i
= 0; i
< nelt2
; ++i
)
37456 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
37457 dremap
.perm
[i
] = i
;
37458 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
37463 dremap
.vmode
= V2DImode
;
37465 dremap
.perm
[0] = 0;
37466 dremap
.perm
[1] = 3;
37469 else if ((contents
& (h2
| h3
)) == contents
)
37472 for (i
= 0; i
< nelt2
; ++i
)
37474 remap
[i
+ nelt2
] = i
;
37475 remap
[i
+ nelt
] = i
+ nelt2
;
37476 dremap
.perm
[i
] = i
+ nelt2
;
37477 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
37482 dremap
.vmode
= V2DImode
;
37484 dremap
.perm
[0] = 1;
37485 dremap
.perm
[1] = 2;
37493 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
37494 unsigned HOST_WIDE_INT q
[8];
37495 unsigned int nonzero_halves
[4];
37497 /* Split the two input vectors into 8 quarters. */
37498 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
37499 for (i
= 1; i
< 8; ++i
)
37500 q
[i
] = q
[0] << (nelt4
* i
);
37501 for (i
= 0; i
< 4; ++i
)
37502 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
37504 nonzero_halves
[nzcnt
] = i
;
37510 gcc_assert (d
->one_operand_p
);
37511 nonzero_halves
[1] = nonzero_halves
[0];
37512 same_halves
= true;
37514 else if (d
->one_operand_p
)
37516 gcc_assert (nonzero_halves
[0] == 0);
37517 gcc_assert (nonzero_halves
[1] == 1);
37522 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
37524 /* Attempt to increase the likelihood that dfinal
37525 shuffle will be intra-lane. */
37526 char tmph
= nonzero_halves
[0];
37527 nonzero_halves
[0] = nonzero_halves
[1];
37528 nonzero_halves
[1] = tmph
;
37531 /* vperm2f128 or vperm2i128. */
37532 for (i
= 0; i
< nelt2
; ++i
)
37534 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
37535 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
37536 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
37537 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
37540 if (d
->vmode
!= V8SFmode
37541 && d
->vmode
!= V4DFmode
37542 && d
->vmode
!= V8SImode
)
37544 dremap
.vmode
= V8SImode
;
37546 for (i
= 0; i
< 4; ++i
)
37548 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
37549 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
37553 else if (d
->one_operand_p
)
37555 else if (TARGET_AVX2
37556 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
37559 for (i
= 0; i
< nelt4
; ++i
)
37562 remap
[i
+ nelt
] = i
* 2 + 1;
37563 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
37564 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
37565 dremap
.perm
[i
* 2] = i
;
37566 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
37567 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
37568 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
37571 else if (TARGET_AVX2
37572 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
37575 for (i
= 0; i
< nelt4
; ++i
)
37577 remap
[i
+ nelt4
] = i
* 2;
37578 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
37579 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
37580 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
37581 dremap
.perm
[i
* 2] = i
+ nelt4
;
37582 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
37583 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
37584 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
37591 /* Use the remapping array set up above to move the elements from their
37592 swizzled locations into their final destinations. */
37594 for (i
= 0; i
< nelt
; ++i
)
37596 unsigned e
= remap
[d
->perm
[i
]];
37597 gcc_assert (e
< nelt
);
37598 /* If same_halves is true, both halves of the remapped vector are the
37599 same. Avoid cross-lane accesses if possible. */
37600 if (same_halves
&& i
>= nelt2
)
37602 gcc_assert (e
< nelt2
);
37603 dfinal
.perm
[i
] = e
+ nelt2
;
37606 dfinal
.perm
[i
] = e
;
37608 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
37609 dfinal
.op1
= dfinal
.op0
;
37610 dfinal
.one_operand_p
= true;
37611 dremap
.target
= dfinal
.op0
;
37613 /* Test if the final remap can be done with a single insn. For V4SFmode or
37614 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
37616 ok
= expand_vec_perm_1 (&dfinal
);
37617 seq
= get_insns ();
37626 if (dremap
.vmode
!= dfinal
.vmode
)
37628 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
37629 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
37630 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
37633 ok
= expand_vec_perm_1 (&dremap
);
37640 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37641 a single vector cross-lane permutation into vpermq followed
37642 by any of the single insn permutations. */
37645 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
37647 struct expand_vec_perm_d dremap
, dfinal
;
37648 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
37649 unsigned contents
[2];
37653 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
37654 && d
->one_operand_p
))
37659 for (i
= 0; i
< nelt2
; ++i
)
37661 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
37662 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
37665 for (i
= 0; i
< 2; ++i
)
37667 unsigned int cnt
= 0;
37668 for (j
= 0; j
< 4; ++j
)
37669 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
37677 dremap
.vmode
= V4DImode
;
37679 dremap
.target
= gen_reg_rtx (V4DImode
);
37680 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
37681 dremap
.op1
= dremap
.op0
;
37682 dremap
.one_operand_p
= true;
37683 for (i
= 0; i
< 2; ++i
)
37685 unsigned int cnt
= 0;
37686 for (j
= 0; j
< 4; ++j
)
37687 if ((contents
[i
] & (1u << j
)) != 0)
37688 dremap
.perm
[2 * i
+ cnt
++] = j
;
37689 for (; cnt
< 2; ++cnt
)
37690 dremap
.perm
[2 * i
+ cnt
] = 0;
37694 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
37695 dfinal
.op1
= dfinal
.op0
;
37696 dfinal
.one_operand_p
= true;
37697 for (i
= 0, j
= 0; i
< nelt
; ++i
)
37701 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
37702 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
37704 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
37705 dfinal
.perm
[i
] |= nelt4
;
37707 gcc_unreachable ();
37710 ok
= expand_vec_perm_1 (&dremap
);
37713 ok
= expand_vec_perm_1 (&dfinal
);
37719 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
37720 a vector permutation using two instructions, vperm2f128 resp.
37721 vperm2i128 followed by any single in-lane permutation. */
37724 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
37726 struct expand_vec_perm_d dfirst
, dsecond
;
37727 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
37731 || GET_MODE_SIZE (d
->vmode
) != 32
37732 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
37736 dsecond
.one_operand_p
= false;
37737 dsecond
.testing_p
= true;
37739 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
37740 immediate. For perm < 16 the second permutation uses
37741 d->op0 as first operand, for perm >= 16 it uses d->op1
37742 as first operand. The second operand is the result of
37744 for (perm
= 0; perm
< 32; perm
++)
37746 /* Ignore permutations which do not move anything cross-lane. */
37749 /* The second shuffle for e.g. V4DFmode has
37750 0123 and ABCD operands.
37751 Ignore AB23, as 23 is already in the second lane
37752 of the first operand. */
37753 if ((perm
& 0xc) == (1 << 2)) continue;
37754 /* And 01CD, as 01 is in the first lane of the first
37756 if ((perm
& 3) == 0) continue;
37757 /* And 4567, as then the vperm2[fi]128 doesn't change
37758 anything on the original 4567 second operand. */
37759 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
37763 /* The second shuffle for e.g. V4DFmode has
37764 4567 and ABCD operands.
37765 Ignore AB67, as 67 is already in the second lane
37766 of the first operand. */
37767 if ((perm
& 0xc) == (3 << 2)) continue;
37768 /* And 45CD, as 45 is in the first lane of the first
37770 if ((perm
& 3) == 2) continue;
37771 /* And 0123, as then the vperm2[fi]128 doesn't change
37772 anything on the original 0123 first operand. */
37773 if ((perm
& 0xf) == (1 << 2)) continue;
37776 for (i
= 0; i
< nelt
; i
++)
37778 j
= d
->perm
[i
] / nelt2
;
37779 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
37780 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
37781 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
37782 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
37790 ok
= expand_vec_perm_1 (&dsecond
);
37801 /* Found a usable second shuffle. dfirst will be
37802 vperm2f128 on d->op0 and d->op1. */
37803 dsecond
.testing_p
= false;
37805 dfirst
.target
= gen_reg_rtx (d
->vmode
);
37806 for (i
= 0; i
< nelt
; i
++)
37807 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
37808 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
37810 ok
= expand_vec_perm_1 (&dfirst
);
37813 /* And dsecond is some single insn shuffle, taking
37814 d->op0 and result of vperm2f128 (if perm < 16) or
37815 d->op1 and result of vperm2f128 (otherwise). */
37816 dsecond
.op1
= dfirst
.target
;
37818 dsecond
.op0
= dfirst
.op1
;
37820 ok
= expand_vec_perm_1 (&dsecond
);
37826 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
37827 if (d
->one_operand_p
)
37834 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37835 a two vector permutation using 2 intra-lane interleave insns
37836 and cross-lane shuffle for 32-byte vectors. */
37839 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
37842 rtx (*gen
) (rtx
, rtx
, rtx
);
37844 if (d
->one_operand_p
)
37846 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
37848 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
37854 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
37856 for (i
= 0; i
< nelt
; i
+= 2)
37857 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
37858 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
37868 gen
= gen_vec_interleave_highv32qi
;
37870 gen
= gen_vec_interleave_lowv32qi
;
37874 gen
= gen_vec_interleave_highv16hi
;
37876 gen
= gen_vec_interleave_lowv16hi
;
37880 gen
= gen_vec_interleave_highv8si
;
37882 gen
= gen_vec_interleave_lowv8si
;
37886 gen
= gen_vec_interleave_highv4di
;
37888 gen
= gen_vec_interleave_lowv4di
;
37892 gen
= gen_vec_interleave_highv8sf
;
37894 gen
= gen_vec_interleave_lowv8sf
;
37898 gen
= gen_vec_interleave_highv4df
;
37900 gen
= gen_vec_interleave_lowv4df
;
37903 gcc_unreachable ();
37906 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
37910 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
37911 a single vector permutation using a single intra-lane vector
37912 permutation, vperm2f128 swapping the lanes and vblend* insn blending
37913 the non-swapped and swapped vectors together. */
37916 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
37918 struct expand_vec_perm_d dfirst
, dsecond
;
37919 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
37922 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
37926 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
37927 || !d
->one_operand_p
)
37931 for (i
= 0; i
< nelt
; i
++)
37932 dfirst
.perm
[i
] = 0xff;
37933 for (i
= 0, msk
= 0; i
< nelt
; i
++)
37935 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
37936 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
37938 dfirst
.perm
[j
] = d
->perm
[i
];
37942 for (i
= 0; i
< nelt
; i
++)
37943 if (dfirst
.perm
[i
] == 0xff)
37944 dfirst
.perm
[i
] = i
;
37947 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
37950 ok
= expand_vec_perm_1 (&dfirst
);
37951 seq
= get_insns ();
37963 dsecond
.op0
= dfirst
.target
;
37964 dsecond
.op1
= dfirst
.target
;
37965 dsecond
.one_operand_p
= true;
37966 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
37967 for (i
= 0; i
< nelt
; i
++)
37968 dsecond
.perm
[i
] = i
^ nelt2
;
37970 ok
= expand_vec_perm_1 (&dsecond
);
37973 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
37974 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
37978 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
37979 permutation using two vperm2f128, followed by a vshufpd insn blending
37980 the two vectors together. */
37983 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
37985 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
37988 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
37998 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
37999 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
38000 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
38001 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
38002 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
38003 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
38004 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
38005 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
38006 dthird
.perm
[0] = (d
->perm
[0] % 2);
38007 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
38008 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
38009 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
38011 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
38012 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
38013 dthird
.op0
= dfirst
.target
;
38014 dthird
.op1
= dsecond
.target
;
38015 dthird
.one_operand_p
= false;
38017 canonicalize_perm (&dfirst
);
38018 canonicalize_perm (&dsecond
);
38020 ok
= expand_vec_perm_1 (&dfirst
)
38021 && expand_vec_perm_1 (&dsecond
)
38022 && expand_vec_perm_1 (&dthird
);
38029 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
38030 permutation with two pshufb insns and an ior. We should have already
38031 failed all two instruction sequences. */
38034 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
38036 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
38037 unsigned int i
, nelt
, eltsz
;
38039 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
38041 gcc_assert (!d
->one_operand_p
);
38044 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38046 /* Generate two permutation masks. If the required element is within
38047 the given vector it is shuffled into the proper lane. If the required
38048 element is in the other vector, force a zero into the lane by setting
38049 bit 7 in the permutation mask. */
38050 m128
= GEN_INT (-128);
38051 for (i
= 0; i
< nelt
; ++i
)
38053 unsigned j
, e
= d
->perm
[i
];
38054 unsigned which
= (e
>= nelt
);
38058 for (j
= 0; j
< eltsz
; ++j
)
38060 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
38061 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
38065 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
38066 vperm
= force_reg (V16QImode
, vperm
);
38068 l
= gen_reg_rtx (V16QImode
);
38069 op
= gen_lowpart (V16QImode
, d
->op0
);
38070 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
38072 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
38073 vperm
= force_reg (V16QImode
, vperm
);
38075 h
= gen_reg_rtx (V16QImode
);
38076 op
= gen_lowpart (V16QImode
, d
->op1
);
38077 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
38079 op
= gen_lowpart (V16QImode
, d
->target
);
38080 emit_insn (gen_iorv16qi3 (op
, l
, h
));
38085 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
38086 with two vpshufb insns, vpermq and vpor. We should have already failed
38087 all two or three instruction sequences. */
38090 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
38092 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
38093 unsigned int i
, nelt
, eltsz
;
38096 || !d
->one_operand_p
38097 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
38104 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38106 /* Generate two permutation masks. If the required element is within
38107 the same lane, it is shuffled in. If the required element from the
38108 other lane, force a zero by setting bit 7 in the permutation mask.
38109 In the other mask the mask has non-negative elements if element
38110 is requested from the other lane, but also moved to the other lane,
38111 so that the result of vpshufb can have the two V2TImode halves
38113 m128
= GEN_INT (-128);
38114 for (i
= 0; i
< nelt
; ++i
)
38116 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38117 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
38119 for (j
= 0; j
< eltsz
; ++j
)
38121 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
38122 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
38126 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
38127 vperm
= force_reg (V32QImode
, vperm
);
38129 h
= gen_reg_rtx (V32QImode
);
38130 op
= gen_lowpart (V32QImode
, d
->op0
);
38131 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
38133 /* Swap the 128-byte lanes of h into hp. */
38134 hp
= gen_reg_rtx (V4DImode
);
38135 op
= gen_lowpart (V4DImode
, h
);
38136 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
38139 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
38140 vperm
= force_reg (V32QImode
, vperm
);
38142 l
= gen_reg_rtx (V32QImode
);
38143 op
= gen_lowpart (V32QImode
, d
->op0
);
38144 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
38146 op
= gen_lowpart (V32QImode
, d
->target
);
38147 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
38152 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
38153 and extract-odd permutations of two V32QImode and V16QImode operand
38154 with two vpshufb insns, vpor and vpermq. We should have already
38155 failed all two or three instruction sequences. */
38158 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
38160 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
38161 unsigned int i
, nelt
, eltsz
;
38164 || d
->one_operand_p
38165 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
38168 for (i
= 0; i
< d
->nelt
; ++i
)
38169 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
38176 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38178 /* Generate two permutation masks. In the first permutation mask
38179 the first quarter will contain indexes for the first half
38180 of the op0, the second quarter will contain bit 7 set, third quarter
38181 will contain indexes for the second half of the op0 and the
38182 last quarter bit 7 set. In the second permutation mask
38183 the first quarter will contain bit 7 set, the second quarter
38184 indexes for the first half of the op1, the third quarter bit 7 set
38185 and last quarter indexes for the second half of the op1.
38186 I.e. the first mask e.g. for V32QImode extract even will be:
38187 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
38188 (all values masked with 0xf except for -128) and second mask
38189 for extract even will be
38190 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
38191 m128
= GEN_INT (-128);
38192 for (i
= 0; i
< nelt
; ++i
)
38194 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38195 unsigned which
= d
->perm
[i
] >= nelt
;
38196 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
38198 for (j
= 0; j
< eltsz
; ++j
)
38200 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
38201 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
38205 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
38206 vperm
= force_reg (V32QImode
, vperm
);
38208 l
= gen_reg_rtx (V32QImode
);
38209 op
= gen_lowpart (V32QImode
, d
->op0
);
38210 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
38212 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
38213 vperm
= force_reg (V32QImode
, vperm
);
38215 h
= gen_reg_rtx (V32QImode
);
38216 op
= gen_lowpart (V32QImode
, d
->op1
);
38217 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
38219 ior
= gen_reg_rtx (V32QImode
);
38220 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
38222 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
38223 op
= gen_lowpart (V4DImode
, d
->target
);
38224 ior
= gen_lowpart (V4DImode
, ior
);
38225 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
38226 const1_rtx
, GEN_INT (3)));
38231 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
38232 and extract-odd permutations. */
38235 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
38242 t1
= gen_reg_rtx (V4DFmode
);
38243 t2
= gen_reg_rtx (V4DFmode
);
38245 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
38246 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
38247 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
38249 /* Now an unpck[lh]pd will produce the result required. */
38251 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
38253 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
38259 int mask
= odd
? 0xdd : 0x88;
38261 t1
= gen_reg_rtx (V8SFmode
);
38262 t2
= gen_reg_rtx (V8SFmode
);
38263 t3
= gen_reg_rtx (V8SFmode
);
38265 /* Shuffle within the 128-bit lanes to produce:
38266 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
38267 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
38270 /* Shuffle the lanes around to produce:
38271 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
38272 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
38275 /* Shuffle within the 128-bit lanes to produce:
38276 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
38277 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
38279 /* Shuffle within the 128-bit lanes to produce:
38280 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
38281 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
38283 /* Shuffle the lanes around to produce:
38284 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
38285 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
38294 /* These are always directly implementable by expand_vec_perm_1. */
38295 gcc_unreachable ();
38299 return expand_vec_perm_pshufb2 (d
);
38302 /* We need 2*log2(N)-1 operations to achieve odd/even
38303 with interleave. */
38304 t1
= gen_reg_rtx (V8HImode
);
38305 t2
= gen_reg_rtx (V8HImode
);
38306 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
38307 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
38308 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
38309 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
38311 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
38313 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
38320 return expand_vec_perm_pshufb2 (d
);
38323 t1
= gen_reg_rtx (V16QImode
);
38324 t2
= gen_reg_rtx (V16QImode
);
38325 t3
= gen_reg_rtx (V16QImode
);
38326 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
38327 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
38328 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
38329 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
38330 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
38331 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
38333 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
38335 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
38342 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
38347 struct expand_vec_perm_d d_copy
= *d
;
38348 d_copy
.vmode
= V4DFmode
;
38349 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
38350 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
38351 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
38352 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
38355 t1
= gen_reg_rtx (V4DImode
);
38356 t2
= gen_reg_rtx (V4DImode
);
38358 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
38359 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
38360 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
38362 /* Now an vpunpck[lh]qdq will produce the result required. */
38364 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
38366 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
38373 struct expand_vec_perm_d d_copy
= *d
;
38374 d_copy
.vmode
= V8SFmode
;
38375 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
38376 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
38377 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
38378 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
38381 t1
= gen_reg_rtx (V8SImode
);
38382 t2
= gen_reg_rtx (V8SImode
);
38384 /* Shuffle the lanes around into
38385 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
38386 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
38387 gen_lowpart (V4DImode
, d
->op0
),
38388 gen_lowpart (V4DImode
, d
->op1
),
38390 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
38391 gen_lowpart (V4DImode
, d
->op0
),
38392 gen_lowpart (V4DImode
, d
->op1
),
38395 /* Swap the 2nd and 3rd position in each lane into
38396 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
38397 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
38398 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
38399 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
38400 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
38402 /* Now an vpunpck[lh]qdq will produce
38403 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
38405 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
38406 gen_lowpart (V4DImode
, t1
),
38407 gen_lowpart (V4DImode
, t2
));
38409 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
38410 gen_lowpart (V4DImode
, t1
),
38411 gen_lowpart (V4DImode
, t2
));
38416 gcc_unreachable ();
38422 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
38423 extract-even and extract-odd permutations. */
38426 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
38428 unsigned i
, odd
, nelt
= d
->nelt
;
38431 if (odd
!= 0 && odd
!= 1)
38434 for (i
= 1; i
< nelt
; ++i
)
38435 if (d
->perm
[i
] != 2 * i
+ odd
)
38438 return expand_vec_perm_even_odd_1 (d
, odd
);
38441 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
38442 permutations. We assume that expand_vec_perm_1 has already failed. */
38445 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
38447 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
38448 enum machine_mode vmode
= d
->vmode
;
38449 unsigned char perm2
[4];
38457 /* These are special-cased in sse.md so that we can optionally
38458 use the vbroadcast instruction. They expand to two insns
38459 if the input happens to be in a register. */
38460 gcc_unreachable ();
38466 /* These are always implementable using standard shuffle patterns. */
38467 gcc_unreachable ();
38471 /* These can be implemented via interleave. We save one insn by
38472 stopping once we have promoted to V4SImode and then use pshufd. */
38476 rtx (*gen
) (rtx
, rtx
, rtx
)
38477 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
38478 : gen_vec_interleave_lowv8hi
;
38482 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
38483 : gen_vec_interleave_highv8hi
;
38488 dest
= gen_reg_rtx (vmode
);
38489 emit_insn (gen (dest
, op0
, op0
));
38490 vmode
= get_mode_wider_vector (vmode
);
38491 op0
= gen_lowpart (vmode
, dest
);
38493 while (vmode
!= V4SImode
);
38495 memset (perm2
, elt
, 4);
38496 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
38505 /* For AVX2 broadcasts of the first element vpbroadcast* or
38506 vpermq should be used by expand_vec_perm_1. */
38507 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
38511 gcc_unreachable ();
38515 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
38516 broadcast permutations. */
38519 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
38521 unsigned i
, elt
, nelt
= d
->nelt
;
38523 if (!d
->one_operand_p
)
38527 for (i
= 1; i
< nelt
; ++i
)
38528 if (d
->perm
[i
] != elt
)
38531 return expand_vec_perm_broadcast_1 (d
);
38534 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
38535 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
38536 all the shorter instruction sequences. */
38539 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
38541 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
38542 unsigned int i
, nelt
, eltsz
;
38546 || d
->one_operand_p
38547 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
38554 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38556 /* Generate 4 permutation masks. If the required element is within
38557 the same lane, it is shuffled in. If the required element from the
38558 other lane, force a zero by setting bit 7 in the permutation mask.
38559 In the other mask the mask has non-negative elements if element
38560 is requested from the other lane, but also moved to the other lane,
38561 so that the result of vpshufb can have the two V2TImode halves
38563 m128
= GEN_INT (-128);
38564 for (i
= 0; i
< 32; ++i
)
38566 rperm
[0][i
] = m128
;
38567 rperm
[1][i
] = m128
;
38568 rperm
[2][i
] = m128
;
38569 rperm
[3][i
] = m128
;
38575 for (i
= 0; i
< nelt
; ++i
)
38577 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38578 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
38579 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
38581 for (j
= 0; j
< eltsz
; ++j
)
38582 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
38583 used
[which
] = true;
38586 for (i
= 0; i
< 2; ++i
)
38588 if (!used
[2 * i
+ 1])
38593 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
38594 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
38595 vperm
= force_reg (V32QImode
, vperm
);
38596 h
[i
] = gen_reg_rtx (V32QImode
);
38597 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
38598 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
38601 /* Swap the 128-byte lanes of h[X]. */
38602 for (i
= 0; i
< 2; ++i
)
38604 if (h
[i
] == NULL_RTX
)
38606 op
= gen_reg_rtx (V4DImode
);
38607 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
38608 const2_rtx
, GEN_INT (3), const0_rtx
,
38610 h
[i
] = gen_lowpart (V32QImode
, op
);
38613 for (i
= 0; i
< 2; ++i
)
38620 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
38621 vperm
= force_reg (V32QImode
, vperm
);
38622 l
[i
] = gen_reg_rtx (V32QImode
);
38623 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
38624 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
38627 for (i
= 0; i
< 2; ++i
)
38631 op
= gen_reg_rtx (V32QImode
);
38632 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
38639 gcc_assert (l
[0] && l
[1]);
38640 op
= gen_lowpart (V32QImode
, d
->target
);
38641 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
38645 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
38646 With all of the interface bits taken care of, perform the expansion
38647 in D and return true on success. */
38650 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
38652 /* Try a single instruction expansion. */
38653 if (expand_vec_perm_1 (d
))
38656 /* Try sequences of two instructions. */
38658 if (expand_vec_perm_pshuflw_pshufhw (d
))
38661 if (expand_vec_perm_palignr (d
))
38664 if (expand_vec_perm_interleave2 (d
))
38667 if (expand_vec_perm_broadcast (d
))
38670 if (expand_vec_perm_vpermq_perm_1 (d
))
38673 if (expand_vec_perm_vperm2f128 (d
))
38676 /* Try sequences of three instructions. */
38678 if (expand_vec_perm_2vperm2f128_vshuf (d
))
38681 if (expand_vec_perm_pshufb2 (d
))
38684 if (expand_vec_perm_interleave3 (d
))
38687 if (expand_vec_perm_vperm2f128_vblend (d
))
38690 /* Try sequences of four instructions. */
38692 if (expand_vec_perm_vpshufb2_vpermq (d
))
38695 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
38698 /* ??? Look for narrow permutations whose element orderings would
38699 allow the promotion to a wider mode. */
38701 /* ??? Look for sequences of interleave or a wider permute that place
38702 the data into the correct lanes for a half-vector shuffle like
38703 pshuf[lh]w or vpermilps. */
38705 /* ??? Look for sequences of interleave that produce the desired results.
38706 The combinatorics of punpck[lh] get pretty ugly... */
38708 if (expand_vec_perm_even_odd (d
))
38711 /* Even longer sequences. */
38712 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
38718 /* If a permutation only uses one operand, make it clear. Returns true
38719 if the permutation references both operands. */
38722 canonicalize_perm (struct expand_vec_perm_d
*d
)
38724 int i
, which
, nelt
= d
->nelt
;
38726 for (i
= which
= 0; i
< nelt
; ++i
)
38727 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
38729 d
->one_operand_p
= true;
38736 if (!rtx_equal_p (d
->op0
, d
->op1
))
38738 d
->one_operand_p
= false;
38741 /* The elements of PERM do not suggest that only the first operand
38742 is used, but both operands are identical. Allow easier matching
38743 of the permutation by folding the permutation into the single
38748 for (i
= 0; i
< nelt
; ++i
)
38749 d
->perm
[i
] &= nelt
- 1;
38758 return (which
== 3);
38762 ix86_expand_vec_perm_const (rtx operands
[4])
38764 struct expand_vec_perm_d d
;
38765 unsigned char perm
[MAX_VECT_LEN
];
38770 d
.target
= operands
[0];
38771 d
.op0
= operands
[1];
38772 d
.op1
= operands
[2];
38775 d
.vmode
= GET_MODE (d
.target
);
38776 gcc_assert (VECTOR_MODE_P (d
.vmode
));
38777 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38778 d
.testing_p
= false;
38780 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
38781 gcc_assert (XVECLEN (sel
, 0) == nelt
);
38782 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
38784 for (i
= 0; i
< nelt
; ++i
)
38786 rtx e
= XVECEXP (sel
, 0, i
);
38787 int ei
= INTVAL (e
) & (2 * nelt
- 1);
38792 two_args
= canonicalize_perm (&d
);
38794 if (ix86_expand_vec_perm_const_1 (&d
))
38797 /* If the selector says both arguments are needed, but the operands are the
38798 same, the above tried to expand with one_operand_p and flattened selector.
38799 If that didn't work, retry without one_operand_p; we succeeded with that
38801 if (two_args
&& d
.one_operand_p
)
38803 d
.one_operand_p
= false;
38804 memcpy (d
.perm
, perm
, sizeof (perm
));
38805 return ix86_expand_vec_perm_const_1 (&d
);
38811 /* Implement targetm.vectorize.vec_perm_const_ok. */
38814 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
38815 const unsigned char *sel
)
38817 struct expand_vec_perm_d d
;
38818 unsigned int i
, nelt
, which
;
38822 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38823 d
.testing_p
= true;
38825 /* Given sufficient ISA support we can just return true here
38826 for selected vector modes. */
38827 if (GET_MODE_SIZE (d
.vmode
) == 16)
38829 /* All implementable with a single vpperm insn. */
38832 /* All implementable with 2 pshufb + 1 ior. */
38835 /* All implementable with shufpd or unpck[lh]pd. */
38840 /* Extract the values from the vector CST into the permutation
38842 memcpy (d
.perm
, sel
, nelt
);
38843 for (i
= which
= 0; i
< nelt
; ++i
)
38845 unsigned char e
= d
.perm
[i
];
38846 gcc_assert (e
< 2 * nelt
);
38847 which
|= (e
< nelt
? 1 : 2);
38850 /* For all elements from second vector, fold the elements to first. */
38852 for (i
= 0; i
< nelt
; ++i
)
38855 /* Check whether the mask can be applied to the vector type. */
38856 d
.one_operand_p
= (which
!= 3);
38858 /* Implementable with shufps or pshufd. */
38859 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
38862 /* Otherwise we have to go through the motions and see if we can
38863 figure out how to generate the requested permutation. */
38864 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
38865 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
38866 if (!d
.one_operand_p
)
38867 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
38870 ret
= ix86_expand_vec_perm_const_1 (&d
);
38877 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
38879 struct expand_vec_perm_d d
;
38885 d
.vmode
= GET_MODE (targ
);
38886 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38887 d
.one_operand_p
= false;
38888 d
.testing_p
= false;
38890 for (i
= 0; i
< nelt
; ++i
)
38891 d
.perm
[i
] = i
* 2 + odd
;
38893 /* We'll either be able to implement the permutation directly... */
38894 if (expand_vec_perm_1 (&d
))
38897 /* ... or we use the special-case patterns. */
38898 expand_vec_perm_even_odd_1 (&d
, odd
);
38902 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
38904 struct expand_vec_perm_d d
;
38905 unsigned i
, nelt
, base
;
38911 d
.vmode
= GET_MODE (targ
);
38912 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38913 d
.one_operand_p
= false;
38914 d
.testing_p
= false;
38916 base
= high_p
? nelt
/ 2 : 0;
38917 for (i
= 0; i
< nelt
/ 2; ++i
)
38919 d
.perm
[i
* 2] = i
+ base
;
38920 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
38923 /* Note that for AVX this isn't one instruction. */
38924 ok
= ix86_expand_vec_perm_const_1 (&d
);
38929 /* Expand a vector operation CODE for a V*QImode in terms of the
38930 same operation on V*HImode. */
38933 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
38935 enum machine_mode qimode
= GET_MODE (dest
);
38936 enum machine_mode himode
;
38937 rtx (*gen_il
) (rtx
, rtx
, rtx
);
38938 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
38939 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
38940 struct expand_vec_perm_d d
;
38941 bool ok
, full_interleave
;
38942 bool uns_p
= false;
38949 gen_il
= gen_vec_interleave_lowv16qi
;
38950 gen_ih
= gen_vec_interleave_highv16qi
;
38953 himode
= V16HImode
;
38954 gen_il
= gen_avx2_interleave_lowv32qi
;
38955 gen_ih
= gen_avx2_interleave_highv32qi
;
38958 gcc_unreachable ();
38961 op2_l
= op2_h
= op2
;
38965 /* Unpack data such that we've got a source byte in each low byte of
38966 each word. We don't care what goes into the high byte of each word.
38967 Rather than trying to get zero in there, most convenient is to let
38968 it be a copy of the low byte. */
38969 op2_l
= gen_reg_rtx (qimode
);
38970 op2_h
= gen_reg_rtx (qimode
);
38971 emit_insn (gen_il (op2_l
, op2
, op2
));
38972 emit_insn (gen_ih (op2_h
, op2
, op2
));
38975 op1_l
= gen_reg_rtx (qimode
);
38976 op1_h
= gen_reg_rtx (qimode
);
38977 emit_insn (gen_il (op1_l
, op1
, op1
));
38978 emit_insn (gen_ih (op1_h
, op1
, op1
));
38979 full_interleave
= qimode
== V16QImode
;
38987 op1_l
= gen_reg_rtx (himode
);
38988 op1_h
= gen_reg_rtx (himode
);
38989 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
38990 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
38991 full_interleave
= true;
38994 gcc_unreachable ();
38997 /* Perform the operation. */
38998 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
39000 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
39002 gcc_assert (res_l
&& res_h
);
39004 /* Merge the data back into the right place. */
39006 d
.op0
= gen_lowpart (qimode
, res_l
);
39007 d
.op1
= gen_lowpart (qimode
, res_h
);
39009 d
.nelt
= GET_MODE_NUNITS (qimode
);
39010 d
.one_operand_p
= false;
39011 d
.testing_p
= false;
39013 if (full_interleave
)
39015 /* For SSE2, we used an full interleave, so the desired
39016 results are in the even elements. */
39017 for (i
= 0; i
< 32; ++i
)
39022 /* For AVX, the interleave used above was not cross-lane. So the
39023 extraction is evens but with the second and third quarter swapped.
39024 Happily, that is even one insn shorter than even extraction. */
39025 for (i
= 0; i
< 32; ++i
)
39026 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
39029 ok
= ix86_expand_vec_perm_const_1 (&d
);
39032 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
39033 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
39037 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
39038 bool uns_p
, bool odd_p
)
39040 enum machine_mode mode
= GET_MODE (op1
);
39041 enum machine_mode wmode
= GET_MODE (dest
);
39044 /* We only play even/odd games with vectors of SImode. */
39045 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
39047 /* If we're looking for the odd results, shift those members down to
39048 the even slots. For some cpus this is faster than a PSHUFD. */
39051 if (TARGET_XOP
&& mode
== V4SImode
)
39053 x
= force_reg (wmode
, CONST0_RTX (wmode
));
39054 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
39058 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
39059 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
39060 x
, NULL
, 1, OPTAB_DIRECT
);
39061 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
39062 x
, NULL
, 1, OPTAB_DIRECT
);
39063 op1
= gen_lowpart (mode
, op1
);
39064 op2
= gen_lowpart (mode
, op2
);
39067 if (mode
== V8SImode
)
39070 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
39072 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
39075 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
39076 else if (TARGET_SSE4_1
)
39077 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
39080 rtx s1
, s2
, t0
, t1
, t2
;
39082 /* The easiest way to implement this without PMULDQ is to go through
39083 the motions as if we are performing a full 64-bit multiply. With
39084 the exception that we need to do less shuffling of the elements. */
39086 /* Compute the sign-extension, aka highparts, of the two operands. */
39087 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
39088 op1
, pc_rtx
, pc_rtx
);
39089 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
39090 op2
, pc_rtx
, pc_rtx
);
39092 /* Multiply LO(A) * HI(B), and vice-versa. */
39093 t1
= gen_reg_rtx (wmode
);
39094 t2
= gen_reg_rtx (wmode
);
39095 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
39096 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
39098 /* Multiply LO(A) * LO(B). */
39099 t0
= gen_reg_rtx (wmode
);
39100 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
39102 /* Combine and shift the highparts into place. */
39103 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
39104 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
39107 /* Combine high and low parts. */
39108 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
39115 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
39116 bool uns_p
, bool high_p
)
39118 enum machine_mode wmode
= GET_MODE (dest
);
39119 enum machine_mode mode
= GET_MODE (op1
);
39120 rtx t1
, t2
, t3
, t4
, mask
;
39125 t1
= gen_reg_rtx (mode
);
39126 t2
= gen_reg_rtx (mode
);
39127 if (TARGET_XOP
&& !uns_p
)
39129 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
39130 shuffle the elements once so that all elements are in the right
39131 place for immediate use: { A C B D }. */
39132 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
39133 const1_rtx
, GEN_INT (3)));
39134 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
39135 const1_rtx
, GEN_INT (3)));
39139 /* Put the elements into place for the multiply. */
39140 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
39141 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
39144 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
39148 /* Shuffle the elements between the lanes. After this we
39149 have { A B E F | C D G H } for each operand. */
39150 t1
= gen_reg_rtx (V4DImode
);
39151 t2
= gen_reg_rtx (V4DImode
);
39152 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
39153 const0_rtx
, const2_rtx
,
39154 const1_rtx
, GEN_INT (3)));
39155 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
39156 const0_rtx
, const2_rtx
,
39157 const1_rtx
, GEN_INT (3)));
39159 /* Shuffle the elements within the lanes. After this we
39160 have { A A B B | C C D D } or { E E F F | G G H H }. */
39161 t3
= gen_reg_rtx (V8SImode
);
39162 t4
= gen_reg_rtx (V8SImode
);
39163 mask
= GEN_INT (high_p
39164 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
39165 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
39166 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
39167 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
39169 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
39174 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
39175 uns_p
, OPTAB_DIRECT
);
39176 t2
= expand_binop (mode
,
39177 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
39178 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
39179 gcc_assert (t1
&& t2
);
39181 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
39186 t1
= gen_reg_rtx (wmode
);
39187 t2
= gen_reg_rtx (wmode
);
39188 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
39189 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
39191 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
39195 gcc_unreachable ();
39200 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
39204 res_1
= gen_reg_rtx (V4SImode
);
39205 res_2
= gen_reg_rtx (V4SImode
);
39206 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
39207 op1
, op2
, true, false);
39208 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
39209 op1
, op2
, true, true);
39211 /* Move the results in element 2 down to element 1; we don't care
39212 what goes in elements 2 and 3. Then we can merge the parts
39213 back together with an interleave.
39215 Note that two other sequences were tried:
39216 (1) Use interleaves at the start instead of psrldq, which allows
39217 us to use a single shufps to merge things back at the end.
39218 (2) Use shufps here to combine the two vectors, then pshufd to
39219 put the elements in the correct order.
39220 In both cases the cost of the reformatting stall was too high
39221 and the overall sequence slower. */
39223 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
39224 const0_rtx
, const0_rtx
));
39225 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
39226 const0_rtx
, const0_rtx
));
39227 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
39229 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
39233 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
39235 enum machine_mode mode
= GET_MODE (op0
);
39236 rtx t1
, t2
, t3
, t4
, t5
, t6
;
39238 if (TARGET_XOP
&& mode
== V2DImode
)
39240 /* op1: A,B,C,D, op2: E,F,G,H */
39241 op1
= gen_lowpart (V4SImode
, op1
);
39242 op2
= gen_lowpart (V4SImode
, op2
);
39244 t1
= gen_reg_rtx (V4SImode
);
39245 t2
= gen_reg_rtx (V4SImode
);
39246 t3
= gen_reg_rtx (V2DImode
);
39247 t4
= gen_reg_rtx (V2DImode
);
39250 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
39256 /* t2: (B*E),(A*F),(D*G),(C*H) */
39257 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
39259 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
39260 emit_insn (gen_xop_phadddq (t3
, t2
));
39262 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
39263 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
39265 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
39266 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
39270 enum machine_mode nmode
;
39271 rtx (*umul
) (rtx
, rtx
, rtx
);
39273 if (mode
== V2DImode
)
39275 umul
= gen_vec_widen_umult_even_v4si
;
39278 else if (mode
== V4DImode
)
39280 umul
= gen_vec_widen_umult_even_v8si
;
39284 gcc_unreachable ();
39287 /* Multiply low parts. */
39288 t1
= gen_reg_rtx (mode
);
39289 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
39291 /* Shift input vectors right 32 bits so we can multiply high parts. */
39293 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
39294 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
39296 /* Multiply high parts by low parts. */
39297 t4
= gen_reg_rtx (mode
);
39298 t5
= gen_reg_rtx (mode
);
39299 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
39300 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
39302 /* Combine and shift the highparts back. */
39303 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
39304 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
39306 /* Combine high and low parts. */
39307 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
39310 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
39311 gen_rtx_MULT (mode
, op1
, op2
));
39314 /* Expand an insert into a vector register through pinsr insn.
39315 Return true if successful. */
39318 ix86_expand_pinsr (rtx
*operands
)
39320 rtx dst
= operands
[0];
39321 rtx src
= operands
[3];
39323 unsigned int size
= INTVAL (operands
[1]);
39324 unsigned int pos
= INTVAL (operands
[2]);
39326 if (GET_CODE (dst
) == SUBREG
)
39328 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
39329 dst
= SUBREG_REG (dst
);
39332 if (GET_CODE (src
) == SUBREG
)
39333 src
= SUBREG_REG (src
);
39335 switch (GET_MODE (dst
))
39342 enum machine_mode srcmode
, dstmode
;
39343 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
39345 srcmode
= mode_for_size (size
, MODE_INT
, 0);
39350 if (!TARGET_SSE4_1
)
39352 dstmode
= V16QImode
;
39353 pinsr
= gen_sse4_1_pinsrb
;
39359 dstmode
= V8HImode
;
39360 pinsr
= gen_sse2_pinsrw
;
39364 if (!TARGET_SSE4_1
)
39366 dstmode
= V4SImode
;
39367 pinsr
= gen_sse4_1_pinsrd
;
39371 gcc_assert (TARGET_64BIT
);
39372 if (!TARGET_SSE4_1
)
39374 dstmode
= V2DImode
;
39375 pinsr
= gen_sse4_1_pinsrq
;
39382 dst
= gen_lowpart (dstmode
, dst
);
39383 src
= gen_lowpart (srcmode
, src
);
39387 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
39396 /* This function returns the calling abi specific va_list type node.
39397 It returns the FNDECL specific va_list type. */
39400 ix86_fn_abi_va_list (tree fndecl
)
39403 return va_list_type_node
;
39404 gcc_assert (fndecl
!= NULL_TREE
);
39406 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
39407 return ms_va_list_type_node
;
39409 return sysv_va_list_type_node
;
39412 /* Returns the canonical va_list type specified by TYPE. If there
39413 is no valid TYPE provided, it return NULL_TREE. */
39416 ix86_canonical_va_list_type (tree type
)
39420 /* Resolve references and pointers to va_list type. */
39421 if (TREE_CODE (type
) == MEM_REF
)
39422 type
= TREE_TYPE (type
);
39423 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
39424 type
= TREE_TYPE (type
);
39425 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
39426 type
= TREE_TYPE (type
);
39428 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
39430 wtype
= va_list_type_node
;
39431 gcc_assert (wtype
!= NULL_TREE
);
39433 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
39435 /* If va_list is an array type, the argument may have decayed
39436 to a pointer type, e.g. by being passed to another function.
39437 In that case, unwrap both types so that we can compare the
39438 underlying records. */
39439 if (TREE_CODE (htype
) == ARRAY_TYPE
39440 || POINTER_TYPE_P (htype
))
39442 wtype
= TREE_TYPE (wtype
);
39443 htype
= TREE_TYPE (htype
);
39446 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
39447 return va_list_type_node
;
39448 wtype
= sysv_va_list_type_node
;
39449 gcc_assert (wtype
!= NULL_TREE
);
39451 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
39453 /* If va_list is an array type, the argument may have decayed
39454 to a pointer type, e.g. by being passed to another function.
39455 In that case, unwrap both types so that we can compare the
39456 underlying records. */
39457 if (TREE_CODE (htype
) == ARRAY_TYPE
39458 || POINTER_TYPE_P (htype
))
39460 wtype
= TREE_TYPE (wtype
);
39461 htype
= TREE_TYPE (htype
);
39464 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
39465 return sysv_va_list_type_node
;
39466 wtype
= ms_va_list_type_node
;
39467 gcc_assert (wtype
!= NULL_TREE
);
39469 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
39471 /* If va_list is an array type, the argument may have decayed
39472 to a pointer type, e.g. by being passed to another function.
39473 In that case, unwrap both types so that we can compare the
39474 underlying records. */
39475 if (TREE_CODE (htype
) == ARRAY_TYPE
39476 || POINTER_TYPE_P (htype
))
39478 wtype
= TREE_TYPE (wtype
);
39479 htype
= TREE_TYPE (htype
);
39482 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
39483 return ms_va_list_type_node
;
39486 return std_canonical_va_list_type (type
);
39489 /* Iterate through the target-specific builtin types for va_list.
39490 IDX denotes the iterator, *PTREE is set to the result type of
39491 the va_list builtin, and *PNAME to its internal type.
39492 Returns zero if there is no element for this index, otherwise
39493 IDX should be increased upon the next call.
39494 Note, do not iterate a base builtin's name like __builtin_va_list.
39495 Used from c_common_nodes_and_builtins. */
39498 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
39508 *ptree
= ms_va_list_type_node
;
39509 *pname
= "__builtin_ms_va_list";
39513 *ptree
= sysv_va_list_type_node
;
39514 *pname
= "__builtin_sysv_va_list";
39522 #undef TARGET_SCHED_DISPATCH
39523 #define TARGET_SCHED_DISPATCH has_dispatch
39524 #undef TARGET_SCHED_DISPATCH_DO
39525 #define TARGET_SCHED_DISPATCH_DO do_dispatch
39526 #undef TARGET_SCHED_REASSOCIATION_WIDTH
39527 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
39528 #undef TARGET_SCHED_REORDER
39529 #define TARGET_SCHED_REORDER ix86_sched_reorder
39531 /* The size of the dispatch window is the total number of bytes of
39532 object code allowed in a window. */
39533 #define DISPATCH_WINDOW_SIZE 16
39535 /* Number of dispatch windows considered for scheduling. */
39536 #define MAX_DISPATCH_WINDOWS 3
39538 /* Maximum number of instructions in a window. */
39541 /* Maximum number of immediate operands in a window. */
39544 /* Maximum number of immediate bits allowed in a window. */
39545 #define MAX_IMM_SIZE 128
39547 /* Maximum number of 32 bit immediates allowed in a window. */
39548 #define MAX_IMM_32 4
39550 /* Maximum number of 64 bit immediates allowed in a window. */
39551 #define MAX_IMM_64 2
39553 /* Maximum total of loads or prefetches allowed in a window. */
39556 /* Maximum total of stores allowed in a window. */
39557 #define MAX_STORE 1
39563 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
39564 enum dispatch_group
{
39579 /* Number of allowable groups in a dispatch window. It is an array
39580 indexed by dispatch_group enum. 100 is used as a big number,
39581 because the number of these kind of operations does not have any
39582 effect in dispatch window, but we need them for other reasons in
39584 static unsigned int num_allowable_groups
[disp_last
] = {
39585 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
39588 char group_name
[disp_last
+ 1][16] = {
39589 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
39590 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
39591 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
39594 /* Instruction path. */
39597 path_single
, /* Single micro op. */
39598 path_double
, /* Double micro op. */
39599 path_multi
, /* Instructions with more than 2 micro op.. */
39603 /* sched_insn_info defines a window to the instructions scheduled in
39604 the basic block. It contains a pointer to the insn_info table and
39605 the instruction scheduled.
39607 Windows are allocated for each basic block and are linked
39609 typedef struct sched_insn_info_s
{
39611 enum dispatch_group group
;
39612 enum insn_path path
;
39617 /* Linked list of dispatch windows. This is a two way list of
39618 dispatch windows of a basic block. It contains information about
39619 the number of uops in the window and the total number of
39620 instructions and of bytes in the object code for this dispatch
39622 typedef struct dispatch_windows_s
{
39623 int num_insn
; /* Number of insn in the window. */
39624 int num_uops
; /* Number of uops in the window. */
39625 int window_size
; /* Number of bytes in the window. */
39626 int window_num
; /* Window number between 0 or 1. */
39627 int num_imm
; /* Number of immediates in an insn. */
39628 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
39629 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
39630 int imm_size
; /* Total immediates in the window. */
39631 int num_loads
; /* Total memory loads in the window. */
39632 int num_stores
; /* Total memory stores in the window. */
39633 int violation
; /* Violation exists in window. */
39634 sched_insn_info
*window
; /* Pointer to the window. */
39635 struct dispatch_windows_s
*next
;
39636 struct dispatch_windows_s
*prev
;
39637 } dispatch_windows
;
39639 /* Immediate valuse used in an insn. */
39640 typedef struct imm_info_s
39647 static dispatch_windows
*dispatch_window_list
;
39648 static dispatch_windows
*dispatch_window_list1
;
39650 /* Get dispatch group of insn. */
39652 static enum dispatch_group
39653 get_mem_group (rtx insn
)
39655 enum attr_memory memory
;
39657 if (INSN_CODE (insn
) < 0)
39658 return disp_no_group
;
39659 memory
= get_attr_memory (insn
);
39660 if (memory
== MEMORY_STORE
)
39663 if (memory
== MEMORY_LOAD
)
39666 if (memory
== MEMORY_BOTH
)
39667 return disp_load_store
;
39669 return disp_no_group
;
39672 /* Return true if insn is a compare instruction. */
39677 enum attr_type type
;
39679 type
= get_attr_type (insn
);
39680 return (type
== TYPE_TEST
39681 || type
== TYPE_ICMP
39682 || type
== TYPE_FCMP
39683 || GET_CODE (PATTERN (insn
)) == COMPARE
);
39686 /* Return true if a dispatch violation encountered. */
39689 dispatch_violation (void)
39691 if (dispatch_window_list
->next
)
39692 return dispatch_window_list
->next
->violation
;
39693 return dispatch_window_list
->violation
;
39696 /* Return true if insn is a branch instruction. */
39699 is_branch (rtx insn
)
39701 return (CALL_P (insn
) || JUMP_P (insn
));
39704 /* Return true if insn is a prefetch instruction. */
39707 is_prefetch (rtx insn
)
39709 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
39712 /* This function initializes a dispatch window and the list container holding a
39713 pointer to the window. */
39716 init_window (int window_num
)
39719 dispatch_windows
*new_list
;
39721 if (window_num
== 0)
39722 new_list
= dispatch_window_list
;
39724 new_list
= dispatch_window_list1
;
39726 new_list
->num_insn
= 0;
39727 new_list
->num_uops
= 0;
39728 new_list
->window_size
= 0;
39729 new_list
->next
= NULL
;
39730 new_list
->prev
= NULL
;
39731 new_list
->window_num
= window_num
;
39732 new_list
->num_imm
= 0;
39733 new_list
->num_imm_32
= 0;
39734 new_list
->num_imm_64
= 0;
39735 new_list
->imm_size
= 0;
39736 new_list
->num_loads
= 0;
39737 new_list
->num_stores
= 0;
39738 new_list
->violation
= false;
39740 for (i
= 0; i
< MAX_INSN
; i
++)
39742 new_list
->window
[i
].insn
= NULL
;
39743 new_list
->window
[i
].group
= disp_no_group
;
39744 new_list
->window
[i
].path
= no_path
;
39745 new_list
->window
[i
].byte_len
= 0;
39746 new_list
->window
[i
].imm_bytes
= 0;
39751 /* This function allocates and initializes a dispatch window and the
39752 list container holding a pointer to the window. */
39754 static dispatch_windows
*
39755 allocate_window (void)
39757 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
39758 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
39763 /* This routine initializes the dispatch scheduling information. It
39764 initiates building dispatch scheduler tables and constructs the
39765 first dispatch window. */
39768 init_dispatch_sched (void)
39770 /* Allocate a dispatch list and a window. */
39771 dispatch_window_list
= allocate_window ();
39772 dispatch_window_list1
= allocate_window ();
39777 /* This function returns true if a branch is detected. End of a basic block
39778 does not have to be a branch, but here we assume only branches end a
39782 is_end_basic_block (enum dispatch_group group
)
39784 return group
== disp_branch
;
39787 /* This function is called when the end of a window processing is reached. */
39790 process_end_window (void)
39792 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
39793 if (dispatch_window_list
->next
)
39795 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
39796 gcc_assert (dispatch_window_list
->window_size
39797 + dispatch_window_list1
->window_size
<= 48);
39803 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
39804 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
39805 for 48 bytes of instructions. Note that these windows are not dispatch
39806 windows that their sizes are DISPATCH_WINDOW_SIZE. */
39808 static dispatch_windows
*
39809 allocate_next_window (int window_num
)
39811 if (window_num
== 0)
39813 if (dispatch_window_list
->next
)
39816 return dispatch_window_list
;
39819 dispatch_window_list
->next
= dispatch_window_list1
;
39820 dispatch_window_list1
->prev
= dispatch_window_list
;
39822 return dispatch_window_list1
;
39825 /* Increment the number of immediate operands of an instruction. */
39828 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
39833 switch ( GET_CODE (*in_rtx
))
39838 (imm_values
->imm
)++;
39839 if (x86_64_immediate_operand (*in_rtx
, SImode
))
39840 (imm_values
->imm32
)++;
39842 (imm_values
->imm64
)++;
39846 (imm_values
->imm
)++;
39847 (imm_values
->imm64
)++;
39851 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
39853 (imm_values
->imm
)++;
39854 (imm_values
->imm32
)++;
39865 /* Compute number of immediate operands of an instruction. */
39868 find_constant (rtx in_rtx
, imm_info
*imm_values
)
39870 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
39871 (rtx_function
) find_constant_1
, (void *) imm_values
);
39874 /* Return total size of immediate operands of an instruction along with number
39875 of corresponding immediate-operands. It initializes its parameters to zero
39876 befor calling FIND_CONSTANT.
39877 INSN is the input instruction. IMM is the total of immediates.
39878 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
39882 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
39884 imm_info imm_values
= {0, 0, 0};
39886 find_constant (insn
, &imm_values
);
39887 *imm
= imm_values
.imm
;
39888 *imm32
= imm_values
.imm32
;
39889 *imm64
= imm_values
.imm64
;
39890 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
39893 /* This function indicates if an operand of an instruction is an
39897 has_immediate (rtx insn
)
39899 int num_imm_operand
;
39900 int num_imm32_operand
;
39901 int num_imm64_operand
;
39904 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39905 &num_imm64_operand
);
39909 /* Return single or double path for instructions. */
39911 static enum insn_path
39912 get_insn_path (rtx insn
)
39914 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
39916 if ((int)path
== 0)
39917 return path_single
;
39919 if ((int)path
== 1)
39920 return path_double
;
39925 /* Return insn dispatch group. */
39927 static enum dispatch_group
39928 get_insn_group (rtx insn
)
39930 enum dispatch_group group
= get_mem_group (insn
);
39934 if (is_branch (insn
))
39935 return disp_branch
;
39940 if (has_immediate (insn
))
39943 if (is_prefetch (insn
))
39944 return disp_prefetch
;
39946 return disp_no_group
;
39949 /* Count number of GROUP restricted instructions in a dispatch
39950 window WINDOW_LIST. */
39953 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
39955 enum dispatch_group group
= get_insn_group (insn
);
39957 int num_imm_operand
;
39958 int num_imm32_operand
;
39959 int num_imm64_operand
;
39961 if (group
== disp_no_group
)
39964 if (group
== disp_imm
)
39966 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39967 &num_imm64_operand
);
39968 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
39969 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
39970 || (num_imm32_operand
> 0
39971 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
39972 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
39973 || (num_imm64_operand
> 0
39974 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
39975 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
39976 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
39977 && num_imm64_operand
> 0
39978 && ((window_list
->num_imm_64
> 0
39979 && window_list
->num_insn
>= 2)
39980 || window_list
->num_insn
>= 3)))
39986 if ((group
== disp_load_store
39987 && (window_list
->num_loads
>= MAX_LOAD
39988 || window_list
->num_stores
>= MAX_STORE
))
39989 || ((group
== disp_load
39990 || group
== disp_prefetch
)
39991 && window_list
->num_loads
>= MAX_LOAD
)
39992 || (group
== disp_store
39993 && window_list
->num_stores
>= MAX_STORE
))
39999 /* This function returns true if insn satisfies dispatch rules on the
40000 last window scheduled. */
40003 fits_dispatch_window (rtx insn
)
40005 dispatch_windows
*window_list
= dispatch_window_list
;
40006 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
40007 unsigned int num_restrict
;
40008 enum dispatch_group group
= get_insn_group (insn
);
40009 enum insn_path path
= get_insn_path (insn
);
40012 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
40013 instructions should be given the lowest priority in the
40014 scheduling process in Haifa scheduler to make sure they will be
40015 scheduled in the same dispatch window as the reference to them. */
40016 if (group
== disp_jcc
|| group
== disp_cmp
)
40019 /* Check nonrestricted. */
40020 if (group
== disp_no_group
|| group
== disp_branch
)
40023 /* Get last dispatch window. */
40024 if (window_list_next
)
40025 window_list
= window_list_next
;
40027 if (window_list
->window_num
== 1)
40029 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
40032 || (min_insn_size (insn
) + sum
) >= 48)
40033 /* Window 1 is full. Go for next window. */
40037 num_restrict
= count_num_restricted (insn
, window_list
);
40039 if (num_restrict
> num_allowable_groups
[group
])
40042 /* See if it fits in the first window. */
40043 if (window_list
->window_num
== 0)
40045 /* The first widow should have only single and double path
40047 if (path
== path_double
40048 && (window_list
->num_uops
+ 2) > MAX_INSN
)
40050 else if (path
!= path_single
)
40056 /* Add an instruction INSN with NUM_UOPS micro-operations to the
40057 dispatch window WINDOW_LIST. */
40060 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
40062 int byte_len
= min_insn_size (insn
);
40063 int num_insn
= window_list
->num_insn
;
40065 sched_insn_info
*window
= window_list
->window
;
40066 enum dispatch_group group
= get_insn_group (insn
);
40067 enum insn_path path
= get_insn_path (insn
);
40068 int num_imm_operand
;
40069 int num_imm32_operand
;
40070 int num_imm64_operand
;
40072 if (!window_list
->violation
&& group
!= disp_cmp
40073 && !fits_dispatch_window (insn
))
40074 window_list
->violation
= true;
40076 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
40077 &num_imm64_operand
);
40079 /* Initialize window with new instruction. */
40080 window
[num_insn
].insn
= insn
;
40081 window
[num_insn
].byte_len
= byte_len
;
40082 window
[num_insn
].group
= group
;
40083 window
[num_insn
].path
= path
;
40084 window
[num_insn
].imm_bytes
= imm_size
;
40086 window_list
->window_size
+= byte_len
;
40087 window_list
->num_insn
= num_insn
+ 1;
40088 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
40089 window_list
->imm_size
+= imm_size
;
40090 window_list
->num_imm
+= num_imm_operand
;
40091 window_list
->num_imm_32
+= num_imm32_operand
;
40092 window_list
->num_imm_64
+= num_imm64_operand
;
40094 if (group
== disp_store
)
40095 window_list
->num_stores
+= 1;
40096 else if (group
== disp_load
40097 || group
== disp_prefetch
)
40098 window_list
->num_loads
+= 1;
40099 else if (group
== disp_load_store
)
40101 window_list
->num_stores
+= 1;
40102 window_list
->num_loads
+= 1;
40106 /* Adds a scheduled instruction, INSN, to the current dispatch window.
40107 If the total bytes of instructions or the number of instructions in
40108 the window exceed allowable, it allocates a new window. */
40111 add_to_dispatch_window (rtx insn
)
40114 dispatch_windows
*window_list
;
40115 dispatch_windows
*next_list
;
40116 dispatch_windows
*window0_list
;
40117 enum insn_path path
;
40118 enum dispatch_group insn_group
;
40126 if (INSN_CODE (insn
) < 0)
40129 byte_len
= min_insn_size (insn
);
40130 window_list
= dispatch_window_list
;
40131 next_list
= window_list
->next
;
40132 path
= get_insn_path (insn
);
40133 insn_group
= get_insn_group (insn
);
40135 /* Get the last dispatch window. */
40137 window_list
= dispatch_window_list
->next
;
40139 if (path
== path_single
)
40141 else if (path
== path_double
)
40144 insn_num_uops
= (int) path
;
40146 /* If current window is full, get a new window.
40147 Window number zero is full, if MAX_INSN uops are scheduled in it.
40148 Window number one is full, if window zero's bytes plus window
40149 one's bytes is 32, or if the bytes of the new instruction added
40150 to the total makes it greater than 48, or it has already MAX_INSN
40151 instructions in it. */
40152 num_insn
= window_list
->num_insn
;
40153 num_uops
= window_list
->num_uops
;
40154 window_num
= window_list
->window_num
;
40155 insn_fits
= fits_dispatch_window (insn
);
40157 if (num_insn
>= MAX_INSN
40158 || num_uops
+ insn_num_uops
> MAX_INSN
40161 window_num
= ~window_num
& 1;
40162 window_list
= allocate_next_window (window_num
);
40165 if (window_num
== 0)
40167 add_insn_window (insn
, window_list
, insn_num_uops
);
40168 if (window_list
->num_insn
>= MAX_INSN
40169 && insn_group
== disp_branch
)
40171 process_end_window ();
40175 else if (window_num
== 1)
40177 window0_list
= window_list
->prev
;
40178 sum
= window0_list
->window_size
+ window_list
->window_size
;
40180 || (byte_len
+ sum
) >= 48)
40182 process_end_window ();
40183 window_list
= dispatch_window_list
;
40186 add_insn_window (insn
, window_list
, insn_num_uops
);
40189 gcc_unreachable ();
40191 if (is_end_basic_block (insn_group
))
40193 /* End of basic block is reached do end-basic-block process. */
40194 process_end_window ();
40199 /* Print the dispatch window, WINDOW_NUM, to FILE. */
40201 DEBUG_FUNCTION
static void
40202 debug_dispatch_window_file (FILE *file
, int window_num
)
40204 dispatch_windows
*list
;
40207 if (window_num
== 0)
40208 list
= dispatch_window_list
;
40210 list
= dispatch_window_list1
;
40212 fprintf (file
, "Window #%d:\n", list
->window_num
);
40213 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
40214 list
->num_insn
, list
->num_uops
, list
->window_size
);
40215 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
40216 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
40218 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
40220 fprintf (file
, " insn info:\n");
40222 for (i
= 0; i
< MAX_INSN
; i
++)
40224 if (!list
->window
[i
].insn
)
40226 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
40227 i
, group_name
[list
->window
[i
].group
],
40228 i
, (void *)list
->window
[i
].insn
,
40229 i
, list
->window
[i
].path
,
40230 i
, list
->window
[i
].byte_len
,
40231 i
, list
->window
[i
].imm_bytes
);
40235 /* Print to stdout a dispatch window. */
40237 DEBUG_FUNCTION
void
40238 debug_dispatch_window (int window_num
)
40240 debug_dispatch_window_file (stdout
, window_num
);
40243 /* Print INSN dispatch information to FILE. */
40245 DEBUG_FUNCTION
static void
40246 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
40249 enum insn_path path
;
40250 enum dispatch_group group
;
40252 int num_imm_operand
;
40253 int num_imm32_operand
;
40254 int num_imm64_operand
;
40256 if (INSN_CODE (insn
) < 0)
40259 byte_len
= min_insn_size (insn
);
40260 path
= get_insn_path (insn
);
40261 group
= get_insn_group (insn
);
40262 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
40263 &num_imm64_operand
);
40265 fprintf (file
, " insn info:\n");
40266 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
40267 group_name
[group
], path
, byte_len
);
40268 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
40269 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
40272 /* Print to STDERR the status of the ready list with respect to
40273 dispatch windows. */
40275 DEBUG_FUNCTION
void
40276 debug_ready_dispatch (void)
40279 int no_ready
= number_in_ready ();
40281 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
40283 for (i
= 0; i
< no_ready
; i
++)
40284 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
40287 /* This routine is the driver of the dispatch scheduler. */
40290 do_dispatch (rtx insn
, int mode
)
40292 if (mode
== DISPATCH_INIT
)
40293 init_dispatch_sched ();
40294 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
40295 add_to_dispatch_window (insn
);
40298 /* Return TRUE if Dispatch Scheduling is supported. */
40301 has_dispatch (rtx insn
, int action
)
40303 if ((TARGET_BDVER1
|| TARGET_BDVER2
)
40304 && flag_dispatch_scheduler
)
40310 case IS_DISPATCH_ON
:
40315 return is_cmp (insn
);
40317 case DISPATCH_VIOLATION
:
40318 return dispatch_violation ();
40320 case FITS_DISPATCH_WINDOW
:
40321 return fits_dispatch_window (insn
);
40327 /* Implementation of reassociation_width target hook used by
40328 reassoc phase to identify parallelism level in reassociated
40329 tree. Statements tree_code is passed in OPC. Arguments type
40332 Currently parallel reassociation is enabled for Atom
40333 processors only and we set reassociation width to be 2
40334 because Atom may issue up to 2 instructions per cycle.
40336 Return value should be fixed if parallel reassociation is
40337 enabled for other processors. */
40340 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
40341 enum machine_mode mode
)
40345 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
40347 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
40353 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
40354 place emms and femms instructions. */
40356 static enum machine_mode
40357 ix86_preferred_simd_mode (enum machine_mode mode
)
40365 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
40367 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
40369 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
40371 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
40374 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
40380 if (!TARGET_VECTORIZE_DOUBLE
)
40382 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
40384 else if (TARGET_SSE2
)
40393 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
40396 static unsigned int
40397 ix86_autovectorize_vector_sizes (void)
40399 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
40402 /* Implement targetm.vectorize.init_cost. */
40405 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
40407 unsigned *cost
= XNEWVEC (unsigned, 3);
40408 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
40412 /* Implement targetm.vectorize.add_stmt_cost. */
40415 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
40416 struct _stmt_vec_info
*stmt_info
, int misalign
,
40417 enum vect_cost_model_location where
)
40419 unsigned *cost
= (unsigned *) data
;
40420 unsigned retval
= 0;
40422 if (flag_vect_cost_model
)
40424 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
40425 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
40427 /* Statements in an inner loop relative to the loop being
40428 vectorized are weighted more heavily. The value here is
40429 arbitrary and could potentially be improved with analysis. */
40430 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
40431 count
*= 50; /* FIXME. */
40433 retval
= (unsigned) (count
* stmt_cost
);
40434 cost
[where
] += retval
;
40440 /* Implement targetm.vectorize.finish_cost. */
40443 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
40444 unsigned *body_cost
, unsigned *epilogue_cost
)
40446 unsigned *cost
= (unsigned *) data
;
40447 *prologue_cost
= cost
[vect_prologue
];
40448 *body_cost
= cost
[vect_body
];
40449 *epilogue_cost
= cost
[vect_epilogue
];
40452 /* Implement targetm.vectorize.destroy_cost_data. */
40455 ix86_destroy_cost_data (void *data
)
40460 /* Validate target specific memory model bits in VAL. */
40462 static unsigned HOST_WIDE_INT
40463 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
40465 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
40466 unsigned HOST_WIDE_INT strong
;
40468 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
40470 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
40472 warning (OPT_Winvalid_memory_model
,
40473 "Unknown architecture specific memory model");
40474 return MEMMODEL_SEQ_CST
;
40476 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
40477 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
40479 warning (OPT_Winvalid_memory_model
,
40480 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
40481 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
40483 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
40485 warning (OPT_Winvalid_memory_model
,
40486 "HLE_RELEASE not used with RELEASE or stronger memory model");
40487 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
40492 /* Initialize the GCC target structure. */
40493 #undef TARGET_RETURN_IN_MEMORY
40494 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
40496 #undef TARGET_LEGITIMIZE_ADDRESS
40497 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
40499 #undef TARGET_ATTRIBUTE_TABLE
40500 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
40501 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
40502 # undef TARGET_MERGE_DECL_ATTRIBUTES
40503 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
40506 #undef TARGET_COMP_TYPE_ATTRIBUTES
40507 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
40509 #undef TARGET_INIT_BUILTINS
40510 #define TARGET_INIT_BUILTINS ix86_init_builtins
40511 #undef TARGET_BUILTIN_DECL
40512 #define TARGET_BUILTIN_DECL ix86_builtin_decl
40513 #undef TARGET_EXPAND_BUILTIN
40514 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
40516 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
40517 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
40518 ix86_builtin_vectorized_function
40520 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
40521 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
40523 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
40524 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
40526 #undef TARGET_VECTORIZE_BUILTIN_GATHER
40527 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
40529 #undef TARGET_BUILTIN_RECIPROCAL
40530 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
40532 #undef TARGET_ASM_FUNCTION_EPILOGUE
40533 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
40535 #undef TARGET_ENCODE_SECTION_INFO
40536 #ifndef SUBTARGET_ENCODE_SECTION_INFO
40537 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
40539 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
40542 #undef TARGET_ASM_OPEN_PAREN
40543 #define TARGET_ASM_OPEN_PAREN ""
40544 #undef TARGET_ASM_CLOSE_PAREN
40545 #define TARGET_ASM_CLOSE_PAREN ""
40547 #undef TARGET_ASM_BYTE_OP
40548 #define TARGET_ASM_BYTE_OP ASM_BYTE
40550 #undef TARGET_ASM_ALIGNED_HI_OP
40551 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
40552 #undef TARGET_ASM_ALIGNED_SI_OP
40553 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
40555 #undef TARGET_ASM_ALIGNED_DI_OP
40556 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
40559 #undef TARGET_PROFILE_BEFORE_PROLOGUE
40560 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
40562 #undef TARGET_ASM_UNALIGNED_HI_OP
40563 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
40564 #undef TARGET_ASM_UNALIGNED_SI_OP
40565 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
40566 #undef TARGET_ASM_UNALIGNED_DI_OP
40567 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
40569 #undef TARGET_PRINT_OPERAND
40570 #define TARGET_PRINT_OPERAND ix86_print_operand
40571 #undef TARGET_PRINT_OPERAND_ADDRESS
40572 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
40573 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
40574 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
40575 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
40576 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
40578 #undef TARGET_SCHED_INIT_GLOBAL
40579 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
40580 #undef TARGET_SCHED_ADJUST_COST
40581 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
40582 #undef TARGET_SCHED_ISSUE_RATE
40583 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
40584 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
40585 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
40586 ia32_multipass_dfa_lookahead
40588 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
40589 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
40591 #undef TARGET_MEMMODEL_CHECK
40592 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
40595 #undef TARGET_HAVE_TLS
40596 #define TARGET_HAVE_TLS true
40598 #undef TARGET_CANNOT_FORCE_CONST_MEM
40599 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
40600 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
40601 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
40603 #undef TARGET_DELEGITIMIZE_ADDRESS
40604 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
40606 #undef TARGET_MS_BITFIELD_LAYOUT_P
40607 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
40610 #undef TARGET_BINDS_LOCAL_P
40611 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
40613 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
40614 #undef TARGET_BINDS_LOCAL_P
40615 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
40618 #undef TARGET_ASM_OUTPUT_MI_THUNK
40619 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
40620 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
40621 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
40623 #undef TARGET_ASM_FILE_START
40624 #define TARGET_ASM_FILE_START x86_file_start
40626 #undef TARGET_OPTION_OVERRIDE
40627 #define TARGET_OPTION_OVERRIDE ix86_option_override
40629 #undef TARGET_REGISTER_MOVE_COST
40630 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
40631 #undef TARGET_MEMORY_MOVE_COST
40632 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
40633 #undef TARGET_RTX_COSTS
40634 #define TARGET_RTX_COSTS ix86_rtx_costs
40635 #undef TARGET_ADDRESS_COST
40636 #define TARGET_ADDRESS_COST ix86_address_cost
40638 #undef TARGET_FIXED_CONDITION_CODE_REGS
40639 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
40640 #undef TARGET_CC_MODES_COMPATIBLE
40641 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
40643 #undef TARGET_MACHINE_DEPENDENT_REORG
40644 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
40646 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
40647 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
40649 #undef TARGET_BUILD_BUILTIN_VA_LIST
40650 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
40652 #undef TARGET_FOLD_BUILTIN
40653 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
40655 #undef TARGET_ENUM_VA_LIST_P
40656 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
40658 #undef TARGET_FN_ABI_VA_LIST
40659 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
40661 #undef TARGET_CANONICAL_VA_LIST_TYPE
40662 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
40664 #undef TARGET_EXPAND_BUILTIN_VA_START
40665 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
40667 #undef TARGET_MD_ASM_CLOBBERS
40668 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
40670 #undef TARGET_PROMOTE_PROTOTYPES
40671 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
40672 #undef TARGET_STRUCT_VALUE_RTX
40673 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
40674 #undef TARGET_SETUP_INCOMING_VARARGS
40675 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
40676 #undef TARGET_MUST_PASS_IN_STACK
40677 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
40678 #undef TARGET_FUNCTION_ARG_ADVANCE
40679 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
40680 #undef TARGET_FUNCTION_ARG
40681 #define TARGET_FUNCTION_ARG ix86_function_arg
40682 #undef TARGET_FUNCTION_ARG_BOUNDARY
40683 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
40684 #undef TARGET_PASS_BY_REFERENCE
40685 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
40686 #undef TARGET_INTERNAL_ARG_POINTER
40687 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
40688 #undef TARGET_UPDATE_STACK_BOUNDARY
40689 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
40690 #undef TARGET_GET_DRAP_RTX
40691 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
40692 #undef TARGET_STRICT_ARGUMENT_NAMING
40693 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
40694 #undef TARGET_STATIC_CHAIN
40695 #define TARGET_STATIC_CHAIN ix86_static_chain
40696 #undef TARGET_TRAMPOLINE_INIT
40697 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
40698 #undef TARGET_RETURN_POPS_ARGS
40699 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
40701 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
40702 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
40704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
40705 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
40707 #undef TARGET_VECTOR_MODE_SUPPORTED_P
40708 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
40710 #undef TARGET_C_MODE_FOR_SUFFIX
40711 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
40714 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
40715 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
40718 #ifdef SUBTARGET_INSERT_ATTRIBUTES
40719 #undef TARGET_INSERT_ATTRIBUTES
40720 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
40723 #undef TARGET_MANGLE_TYPE
40724 #define TARGET_MANGLE_TYPE ix86_mangle_type
40727 #undef TARGET_STACK_PROTECT_FAIL
40728 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
40731 #undef TARGET_FUNCTION_VALUE
40732 #define TARGET_FUNCTION_VALUE ix86_function_value
40734 #undef TARGET_FUNCTION_VALUE_REGNO_P
40735 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
40737 #undef TARGET_PROMOTE_FUNCTION_MODE
40738 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
40740 #undef TARGET_MEMBER_TYPE_FORCES_BLK
40741 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
40743 #undef TARGET_SECONDARY_RELOAD
40744 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
40746 #undef TARGET_CLASS_MAX_NREGS
40747 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
40749 #undef TARGET_PREFERRED_RELOAD_CLASS
40750 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
40751 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
40752 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
40753 #undef TARGET_CLASS_LIKELY_SPILLED_P
40754 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
40756 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
40757 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
40758 ix86_builtin_vectorization_cost
40759 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
40760 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
40761 ix86_vectorize_vec_perm_const_ok
40762 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
40763 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
40764 ix86_preferred_simd_mode
40765 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
40766 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
40767 ix86_autovectorize_vector_sizes
40768 #undef TARGET_VECTORIZE_INIT_COST
40769 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
40770 #undef TARGET_VECTORIZE_ADD_STMT_COST
40771 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
40772 #undef TARGET_VECTORIZE_FINISH_COST
40773 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
40774 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
40775 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
40777 #undef TARGET_SET_CURRENT_FUNCTION
40778 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
40780 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
40781 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
40783 #undef TARGET_OPTION_SAVE
40784 #define TARGET_OPTION_SAVE ix86_function_specific_save
40786 #undef TARGET_OPTION_RESTORE
40787 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
40789 #undef TARGET_OPTION_PRINT
40790 #define TARGET_OPTION_PRINT ix86_function_specific_print
40792 #undef TARGET_CAN_INLINE_P
40793 #define TARGET_CAN_INLINE_P ix86_can_inline_p
40795 #undef TARGET_EXPAND_TO_RTL_HOOK
40796 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
40798 #undef TARGET_LEGITIMATE_ADDRESS_P
40799 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
40801 #undef TARGET_LEGITIMATE_CONSTANT_P
40802 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
40804 #undef TARGET_FRAME_POINTER_REQUIRED
40805 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
40807 #undef TARGET_CAN_ELIMINATE
40808 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
40810 #undef TARGET_EXTRA_LIVE_ON_ENTRY
40811 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
40813 #undef TARGET_ASM_CODE_END
40814 #define TARGET_ASM_CODE_END ix86_code_end
40816 #undef TARGET_CONDITIONAL_REGISTER_USAGE
40817 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
40820 #undef TARGET_INIT_LIBFUNCS
40821 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
40824 struct gcc_target targetm
= TARGET_INITIALIZER
;
40826 #include "gt-i386.h"